[llvm] [VPlan] Replace PhiR operand of ComputeFindIVResult with VPIRFlags. #174026 (PR #175461)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 11 13:20:37 PST 2026
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/175461
Replace the Phi recipe operand of ComputeFindIVResult with VPIRFlags, building on top of https://github.com/llvm/llvm-project/pull/174026.
>From b7b3df2544e65c15c24ad63dfaa44688e5a25398 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 11 Jan 2026 17:17:44 +0000
Subject: [PATCH 1/2] [VPlan] ComputeReductionResult kind flags.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 14 +++--
llvm/lib/Transforms/Vectorize/VPlan.h | 55 +++++++++++++++++--
.../Vectorize/VPlanConstruction.cpp | 42 ++++++++------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 51 +++++++++++------
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +-
.../LoopVectorize/AArch64/vplan-printing.ll | 8 +--
.../RISCV/vplan-vp-intrinsics-reduction.ll | 8 +--
...-order-recurrence-sink-replicate-region.ll | 2 +-
.../vplan-printing-reductions.ll | 26 ++++-----
9 files changed, 141 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cdc6ecfa21bcb..0dc1ce02ad974 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8755,13 +8755,15 @@ void LoopVectorizationPlanner::addReductionResultComputation(
Builder.createNaryOp(VPInstruction::ComputeAnyOfResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
} else {
- VPIRFlags Flags =
+ FastMathFlags FMFs =
RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind)
- ? VPIRFlags(RdxDesc.getFastMathFlags())
- : VPIRFlags();
+ ? RdxDesc.getFastMathFlags()
+ : FastMathFlags();
+ VPIRFlags Flags(RecurrenceKind, PhiR->isOrdered(), PhiR->isInLoop(),
+ FMFs);
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeReductionResult,
- {PhiR, NewExitingVPV}, Flags, ExitDL);
+ {NewExitingVPV}, Flags, ExitDL);
}
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
@@ -8789,8 +8791,8 @@ void LoopVectorizationPlanner::addReductionResultComputation(
PhiR->setOperand(1, Extnd->getVPSingleValue());
// Update ComputeReductionResult with the truncated exiting value and
- // extend its result.
- FinalReductionResult->setOperand(1, Trunc);
+ // extend its result. Operand 0 is the first reduction part.
+ FinalReductionResult->setOperand(0, Trunc);
FinalReductionResult =
Builder.createScalarCast(ExtendOpc, FinalReductionResult, PhiTy, {});
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 83fe45bfd0bbf..234a2e30ca9ff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -616,6 +616,7 @@ class VPIRFlags {
GEPOp,
FPMathOp,
NonNegOp,
+ ReductionOp,
Other
};
@@ -665,6 +666,18 @@ class VPIRFlags {
CmpInst::Predicate Pred;
FastMathFlagsTy FMFs;
};
+ /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
+ struct ReductionFlagsTy {
+ unsigned char Kind : 6; // RecurKind has ~26 values, needs 5 bits
+ unsigned char IsOrdered : 1;
+ unsigned char IsInLoop : 1;
+ FastMathFlagsTy FMFs;
+
+ ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
+ FastMathFlags FMFs)
+ : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
+ IsInLoop(IsInLoop), FMFs(FMFs) {}
+ };
OperationType OpType;
@@ -678,6 +691,7 @@ class VPIRFlags {
NonNegFlagsTy NonNegFlags;
FastMathFlagsTy FMFs;
FCmpFlagsTy FCmpFlags;
+ ReductionFlagsTy ReductionFlags;
unsigned AllFlags;
};
@@ -745,6 +759,10 @@ class VPIRFlags {
VPIRFlags(GEPNoWrapFlags GEPFlags)
: OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
+ VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
+ : OpType(OperationType::ReductionOp),
+ ReductionFlags(Kind, IsOrdered, IsInLoop, FMFs) {}
+
void transferFlags(VPIRFlags &Other) {
OpType = Other.OpType;
AllFlags = Other.AllFlags;
@@ -785,6 +803,7 @@ class VPIRFlags {
NonNegFlags.NonNeg = false;
break;
case OperationType::Cmp:
+ case OperationType::ReductionOp:
case OperationType::Other:
break;
}
@@ -826,6 +845,7 @@ class VPIRFlags {
I.setNonNeg(NonNegFlags.NonNeg);
break;
case OperationType::Cmp:
+ case OperationType::ReductionOp:
case OperationType::Other:
break;
}
@@ -855,7 +875,8 @@ class VPIRFlags {
/// Returns true if the recipe has fast-math flags.
bool hasFastMathFlags() const {
- return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
+ return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
+ OpType == OperationType::ReductionOp;
}
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const;
@@ -897,13 +918,39 @@ class VPIRFlags {
return DisjointFlags.IsDisjoint;
}
+ RecurKind getRecurKind() const {
+ assert(OpType == OperationType::ReductionOp &&
+ "recipe doesn't have reduction flags");
+ return static_cast<RecurKind>(ReductionFlags.Kind);
+ }
+
+ bool isReductionOrdered() const {
+ assert(OpType == OperationType::ReductionOp &&
+ "recipe doesn't have reduction flags");
+ return ReductionFlags.IsOrdered;
+ }
+
+ bool isReductionInLoop() const {
+ assert(OpType == OperationType::ReductionOp &&
+ "recipe doesn't have reduction flags");
+ return ReductionFlags.IsInLoop;
+ }
+
private:
- /// Get a reference to the fast-math flags for FPMathOp or FCmp.
+ /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
FastMathFlagsTy &getFMFsRef() {
- return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
+ if (OpType == OperationType::FCmp)
+ return FCmpFlags.FMFs;
+ if (OpType == OperationType::ReductionOp)
+ return ReductionFlags.FMFs;
+ return FMFs;
}
const FastMathFlagsTy &getFMFsRef() const {
- return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
+ if (OpType == OperationType::FCmp)
+ return FCmpFlags.FMFs;
+ if (OpType == OperationType::ReductionOp)
+ return ReductionFlags.FMFs;
+ return FMFs;
}
public:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 33355f9dcd88c..4ca65e3bfb4c8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -1228,12 +1228,22 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
// If we exit early due to NaNs, compute the final reduction result based on
// the reduction phi at the beginning of the last vector iteration.
+ VPValue *BackedgeVal = RedPhiR->getBackedgeValue();
auto *RdxResult =
- findUserOf<VPInstruction::ComputeReductionResult>(RedPhiR);
+ findUserOf<VPInstruction::ComputeReductionResult>(BackedgeVal);
+
+ // Look through selects inserted for tail folding.
+ if (!RdxResult) {
+ auto *SelR = cast<VPSingleDefRecipe>(
+ *find_if(BackedgeVal->users(),
+ [PhiR = RedPhiR](VPUser *U) { return U != PhiR; }));
+ RdxResult = findUserOf<VPInstruction::ComputeReductionResult>(SelR);
+ assert(RdxResult && "must find a ComputeReductionResult");
+ }
auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
- RdxResult->getOperand(1));
- RdxResult->setOperand(1, NewSel);
+ RdxResult->getOperand(0));
+ RdxResult->setOperand(0, NewSel);
assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
RdxResults.insert(RdxResult);
}
@@ -1299,10 +1309,10 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
// MinMaxPhiR has users outside the reduction cycle in the loop. Check if
// the only other user is a FindLastIV reduction. MinMaxPhiR must have
- // exactly 3 users: 1) the min/max operation, the compare of a FindLastIV
- // reduction and ComputeReductionResult. The comparisom must compare
- // MinMaxPhiR against the min/max operand used for the min/max reduction
- // and only be used by the select of the FindLastIV reduction.
+ // exactly 2 users: 1) the min/max operation and the compare of a FindLastIV
+ // reduction. The comparison must compare MinMaxPhiR against the min/max
+ // operand used for the min/max reduction and only be used by the select of
+ // the FindLastIV reduction.
RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind();
assert(
RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) &&
@@ -1319,8 +1329,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
if (!match(MinMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
return false;
- // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult
- // (asserted below).
+ // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult.
assert(MinMaxOp->getNumUsers() == 2 &&
"MinMaxOp must have exactly 2 users");
VPValue *MinMaxOpValue = MinMaxOp->getOperand(0);
@@ -1339,20 +1348,17 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
if (MinMaxOpValue != CmpOpB)
Pred = CmpInst::getSwappedPredicate(Pred);
- // MinMaxPhiR must have exactly 3 users:
+ // MinMaxPhiR must have exactly 2 users:
// * MinMaxOp,
- // * Cmp (that's part of a FindLastIV chain),
- // * ComputeReductionResult.
- if (MinMaxPhiR->getNumUsers() != 3)
+ // * Cmp (that's part of a FindLastIV chain).
+ if (MinMaxPhiR->getNumUsers() != 2)
return false;
VPInstruction *MinMaxResult =
- findUserOf<VPInstruction::ComputeReductionResult>(MinMaxPhiR);
+ findUserOf<VPInstruction::ComputeReductionResult>(MinMaxOp);
assert(is_contained(MinMaxPhiR->users(), MinMaxOp) &&
"one user must be MinMaxOp");
- assert(MinMaxResult && "MinMaxResult must be a user of MinMaxPhiR");
- assert(is_contained(MinMaxOp->users(), MinMaxResult) &&
- "MinMaxResult must be a user of MinMaxOp (and of MinMaxPhiR");
+ assert(MinMaxResult && "MinMaxResult must be a user of MinMaxOp");
// Cmp must be used by the select of a FindLastIV chain.
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
@@ -1429,7 +1435,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
FindIVResult->getIterator());
VPBuilder B(FindIVResult);
- VPValue *MinMaxExiting = MinMaxResult->getOperand(1);
+ VPValue *MinMaxExiting = MinMaxResult->getOperand(0);
auto *FinalMinMaxCmp =
B.createICmp(CmpInst::ICMP_EQ, MinMaxExiting, MinMaxResult);
VPValue *Sentinel = FindIVResult->getOperand(2);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2c0772320c3cf..ca95a7685c1ee 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -345,6 +345,16 @@ void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
case OperationType::Cmp:
assert(CmpPredicate == Other.CmpPredicate && "Cannot drop CmpPredicate");
break;
+ case OperationType::ReductionOp:
+ assert(ReductionFlags.Kind == Other.ReductionFlags.Kind &&
+ "Cannot change RecurKind");
+ assert(ReductionFlags.IsOrdered == Other.ReductionFlags.IsOrdered &&
+ "Cannot change IsOrdered");
+ assert(ReductionFlags.IsInLoop == Other.ReductionFlags.IsInLoop &&
+ "Cannot change IsInLoop");
+ getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;
+ getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;
+ break;
case OperationType::Other:
assert(AllFlags == Other.AllFlags && "Cannot drop other flags");
break;
@@ -352,7 +362,8 @@ void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
}
FastMathFlags VPIRFlags::getFastMathFlags() const {
- assert((OpType == OperationType::FPMathOp || OpType == OperationType::FCmp) &&
+ assert((OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
+ OpType == OperationType::ReductionOp) &&
"recipe doesn't have fast math flags");
const FastMathFlagsTy &F = getFMFsRef();
FastMathFlags Res;
@@ -437,6 +448,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::BuildVector:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::ComputeReductionResult:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractLastPart:
@@ -452,7 +464,6 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::Store:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnTwoConds:
- case VPInstruction::ComputeReductionResult:
case VPInstruction::ExtractLane:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
@@ -756,21 +767,16 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateSelect(Cmp, ReducedIV, Start, "rdx.select");
}
case VPInstruction::ComputeReductionResult: {
- // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
- // and will be removed by breaking up the recipe further.
- auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
- // Get its reduction variable descriptor.
-
- RecurKind RK = PhiR->getRecurrenceKind();
+ RecurKind RK = getRecurKind();
+ bool IsOrdered = isReductionOrdered();
+ bool IsInLoop = isReductionInLoop();
assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
"should be handled by ComputeFindIVResult");
- // The recipe's operands are the reduction phi, followed by one operand for
- // each part of the reduction.
- unsigned UF = getNumOperands() - 1;
+ unsigned UF = getNumOperands();
VectorParts RdxParts(UF);
for (unsigned Part = 0; Part < UF; ++Part)
- RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
+ RdxParts[Part] = State.get(getOperand(Part), IsInLoop);
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
if (hasFastMathFlags())
@@ -778,7 +784,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
- if (PhiR->isOrdered()) {
+ if (IsOrdered) {
ReducedPartRdx = RdxParts[UF - 1];
} else {
// Floating-point operations should have some FMF to enable the reduction.
@@ -801,7 +807,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
- if (State.VF.isVector() && !PhiR->isInLoop()) {
+ if (State.VF.isVector() && !IsInLoop) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
@@ -2080,14 +2086,15 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
Opcode == Instruction::FRem || Opcode == Instruction::FPExt ||
Opcode == Instruction::FPTrunc || Opcode == Instruction::Select ||
Opcode == VPInstruction::WideIVStep ||
- Opcode == VPInstruction::ReductionStartVector ||
- Opcode == VPInstruction::ComputeReductionResult;
+ Opcode == VPInstruction::ReductionStartVector;
case OperationType::FCmp:
return Opcode == Instruction::FCmp;
case OperationType::NonNegOp:
return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;
case OperationType::Cmp:
return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
+ case OperationType::ReductionOp:
+ return Opcode == VPInstruction::ComputeReductionResult;
case OperationType::Other:
return true;
}
@@ -2140,6 +2147,18 @@ void VPIRFlags::printFlags(raw_ostream &O) const {
if (NonNegFlags.NonNeg)
O << " nneg";
break;
+ case OperationType::ReductionOp: {
+ RecurKind RK = static_cast<RecurKind>(ReductionFlags.Kind);
+ O << " ("
+ << Instruction::getOpcodeName(RecurrenceDescriptor::getOpcode(RK));
+ if (ReductionFlags.IsInLoop)
+ O << ", in-loop";
+ if (ReductionFlags.IsOrdered)
+ O << ", ordered";
+ O << ")";
+ getFastMathFlags().print(O);
+ break;
+ }
case OperationType::Other:
break;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index be2a68ca40b93..15859ce16d42b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -371,7 +371,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
- m_VPValue(), m_VPValue(Op1))) ||
+ m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
index 32ee9a0142a7b..65a1a6e64a40e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
@@ -39,7 +39,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) "target-features"="+neon,+do
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result (add) vp<[[REDUCE]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -96,7 +96,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) "target-features"="+neon,+do
; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%[0-9]+]]> = compute-reduction-result ir<[[RDX]]>, ir<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%[0-9]+]]> = compute-reduction-result (add) ir<[[RDX_NEXT]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
@@ -165,7 +165,7 @@ define i32 @print_partial_reduction_predication(ptr %a, ptr %b, i64 %N) "target-
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%[0-9]+]]> = compute-reduction-result ir<%accum>, vp<[[REDUCE]]>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%[0-9]+]]> = compute-reduction-result (add) vp<[[REDUCE]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
@@ -226,7 +226,7 @@ define i32 @print_partial_reduction_ext_mul(ptr %a, ptr %b) "target-features"="+
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result (add) vp<[[REDUCE]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 642cd467c839f..a3293366c17bc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -58,7 +58,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: Successor(s): middle.block
; IF-EVL-OUTLOOP-EMPTY:
; IF-EVL-OUTLOOP-NEXT: middle.block:
-; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]>
+; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result (add) vp<[[RDX_SELECT]]>
; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb<for.end>
; IF-EVL-OUTLOOP-EMPTY:
; IF-EVL-OUTLOOP-NEXT: ir-bb<for.end>:
@@ -97,7 +97,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: Successor(s): middle.block
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: middle.block:
-; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result (add, in-loop) ir<[[ADD]]>
; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb<for.end>
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: ir-bb<for.end>:
@@ -131,7 +131,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: Successor(s): middle.block
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: middle.block:
-; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
+; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result (add) ir<[[ADD]]>
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; NO-VP-OUTLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]>
; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
@@ -179,7 +179,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: Successor(s): middle.block
; NO-VP-INLOOP-EMPTY:
; NO-VP-INLOOP-NEXT: middle.block:
-; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
+; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result (add, in-loop) ir<[[ADD]]>
; NO-VP-INLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; NO-VP-INLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]>
; NO-VP-INLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 93a339b48c17b..3f9e530f1a097 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -238,7 +238,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]>
+; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result (and) vp<[[SEL]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 910caf82ee759..fc63e4975c3f5 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -35,7 +35,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result (fadd, in-loop) fast ir<%red.next>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -103,7 +103,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RES:.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<[[RED_RES:.+]]> = compute-reduction-result (fadd, in-loop) fast ir<%red.next>
; CHECK-NEXT: CLONE store vp<[[RED_RES]]>, ir<%dst>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
@@ -177,7 +177,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result nnan ninf nsz ir<%sum.07>, ir<[[MULADD]]>
+; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result (fadd, in-loop) nnan ninf nsz ir<[[MULADD]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -496,7 +496,7 @@ define i32 @print_mulacc_sub(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8>
+; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result (sub, in-loop) vp<%8>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -552,7 +552,7 @@ define i32 @print_mulacc_sub(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result (sub, in-loop) ir<%add>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
@@ -615,7 +615,7 @@ define i32 @print_mulacc_negated(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8>
+; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result (add, in-loop) vp<%8>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -673,7 +673,7 @@ define i32 @print_mulacc_negated(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result (add, in-loop) ir<%add>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
@@ -836,7 +836,7 @@ define i32 @print_mulacc_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result (add, in-loop) vp<[[RDX_NEXT]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
entry:
@@ -891,7 +891,7 @@ define i32 @print_mulacc_extended_const_lhs(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result (add, in-loop) vp<[[RDX_NEXT]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
entry:
@@ -947,7 +947,7 @@ define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX:%.+]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result (add, in-loop) vp<[[RDX_NEXT]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
entry:
@@ -1002,7 +1002,7 @@ define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result (add, in-loop) vp<[[RDX_NEXT]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
entry:
@@ -1060,7 +1060,7 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RES:%.+]]> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<[[RES:%.+]]> = compute-reduction-result (add, in-loop) vp<[[RDX_NEXT]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
entry:
@@ -1118,7 +1118,7 @@ define i64 @print_ext_mul_two_uses(i64 %n, ptr %a, i16 %b, i32 %c) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%7> = compute-reduction-result ir<%res2>, vp<%5>
+; CHECK-NEXT: EMIT vp<%7> = compute-reduction-result (add, in-loop) vp<%5>
; CHECK-NEXT: EMIT vp<[[EXT_PART:%.+]]> = extract-last-part ir<%load.ext.ext>
; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-lane vp<[[EXT_PART]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%2>, vp<%1>
>From 7abb0086b3e21d50139722dc1d7532155b918dae Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 11 Jan 2026 21:00:20 +0000
Subject: [PATCH 2/2] [VPlan] FindIVResult
---
.../Transforms/Vectorize/LoopVectorize.cpp | 14 ++---
.../Vectorize/VPlanConstruction.cpp | 13 ++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 51 +++++++++++--------
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +-
.../vplan-printing-reductions.ll | 2 +-
5 files changed, 48 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0dc1ce02ad974..9fad6c188deca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7302,7 +7302,7 @@ static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
using namespace VPlanPatternMatch;
assert(RdxResult->getOpcode() == VPInstruction::ComputeFindIVResult &&
"RdxResult must be ComputeFindIVResult");
- VPValue *StartVPV = RdxResult->getOperand(1);
+ VPValue *StartVPV = RdxResult->getOperand(0);
match(StartVPV, m_Freeze(m_VPValue(StartVPV)));
return StartVPV->getLiveInIRValue();
}
@@ -7360,7 +7360,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
MainResumeValue = Cmp->getOperand(0);
} else if (RecurrenceDescriptor::isFindIVRecurrenceKind(Kind)) {
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
- Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
+ Value *SentinelV = EpiRedResult->getOperand(1)->getLiveInIRValue();
using namespace llvm::PatternMatch;
Value *Cmp, *OrigResumeV, *CmpOp;
[[maybe_unused]] bool IsExpectedPattern =
@@ -8746,9 +8746,11 @@ void LoopVectorizationPlanner::addReductionResultComputation(
if (RecurrenceDescriptor::isFindIVRecurrenceKind(RecurrenceKind)) {
VPValue *Start = PhiR->getStartValue();
VPValue *Sentinel = Plan->getOrAddLiveIn(RdxDesc.getSentinelValue());
+ VPIRFlags Flags(RecurrenceKind, /*IsOrdered=*/false, /*IsInLoop=*/false,
+ FastMathFlags());
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeFindIVResult,
- {PhiR, Start, Sentinel, NewExitingVPV}, ExitDL);
+ {Start, Sentinel, NewExitingVPV}, Flags, ExitDL);
} else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RecurrenceKind)) {
VPValue *Start = PhiR->getStartValue();
FinalReductionResult =
@@ -9294,12 +9296,12 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
auto *VPI = dyn_cast<VPInstruction>(&R);
if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindIVResult)
continue;
- VPValue *OrigStart = VPI->getOperand(1);
+ VPValue *OrigStart = VPI->getOperand(0);
if (isGuaranteedNotToBeUndefOrPoison(OrigStart->getLiveInIRValue()))
continue;
VPInstruction *Freeze =
Builder.createNaryOp(Instruction::Freeze, {OrigStart}, {}, "fr");
- VPI->setOperand(1, Freeze);
+ VPI->setOperand(0, Freeze);
if (UpdateResumePhis)
OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
return Freeze != &U && isa<VPPhi>(&U);
@@ -9456,7 +9458,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
if (auto *I = dyn_cast<Instruction>(Cmp))
InstsToMove.push_back(I);
- Value *Sentinel = RdxResult->getOperand(2)->getLiveInIRValue();
+ Value *Sentinel = RdxResult->getOperand(1)->getLiveInIRValue();
ResumeV = Builder.CreateSelect(Cmp, Sentinel, ResumeV);
if (auto *I = dyn_cast<Instruction>(ResumeV))
InstsToMove.push_back(I);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 4ca65e3bfb4c8..0ed9d845ebf0d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -1417,7 +1417,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
// For example, this transforms
// vp<%min.result> = compute-reduction-result ir<%min.val>,
// ir<%min.val.next>
- // vp<%find.iv.result = compute-find-iv-result ir<%min.idx>, ir<0>,
+ // vp<%find.iv.result = compute-find-iv-result ir<0>,
// SENTINEL, vp<%min.idx.next>
//
// into:
@@ -1425,10 +1425,11 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
// vp<min.result> = compute-reduction-result ir<%min.val>, ir<%min.val.next>
// vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
// vp<%final.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, SENTINEL
- // vp<%find.iv.result> = compute-find-iv-result ir<%min.idx>, ir<0>,
+ // vp<%find.iv.result> = compute-find-iv-result ir<0>,
// SENTINEL, vp<%final.iv>
VPInstruction *FindIVResult =
- findUserOf<VPInstruction::ComputeFindIVResult>(FindIVPhiR);
+ findUserOf<VPInstruction::ComputeFindIVResult>(
+ FindIVPhiR->getBackedgeValue());
assert(FindIVResult->getParent() == MinMaxResult->getParent() &&
"both results must be computed in the same block");
MinMaxResult->moveBefore(*FindIVResult->getParent(),
@@ -1438,11 +1439,11 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
VPValue *MinMaxExiting = MinMaxResult->getOperand(0);
auto *FinalMinMaxCmp =
B.createICmp(CmpInst::ICMP_EQ, MinMaxExiting, MinMaxResult);
- VPValue *Sentinel = FindIVResult->getOperand(2);
- VPValue *LastIVExiting = FindIVResult->getOperand(3);
+ VPValue *Sentinel = FindIVResult->getOperand(1);
+ VPValue *LastIVExiting = FindIVResult->getOperand(2);
auto *FinalIVSelect =
B.createSelect(FinalMinMaxCmp, LastIVExiting, Sentinel);
- FindIVResult->setOperand(3, FinalIVSelect);
+ FindIVResult->setOperand(2, FinalIVSelect);
}
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ca95a7685c1ee..51b8ea1445d1e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -474,10 +474,9 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::Select:
case VPInstruction::ActiveLaneMask:
case VPInstruction::ComputeAnyOfResult:
+ case VPInstruction::ComputeFindIVResult:
case VPInstruction::ReductionStartVector:
return 3;
- case VPInstruction::ComputeFindIVResult:
- return 4;
case Instruction::Call:
case Instruction::GetElementPtr:
case Instruction::PHI:
@@ -726,20 +725,14 @@ Value *VPInstruction::generate(VPTransformState &State) {
State.get(getOperand(1), VPLane(0)), OrigPhi);
}
case VPInstruction::ComputeFindIVResult: {
- // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
- // and will be removed by breaking up the recipe further.
- auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
- // Get its reduction variable descriptor.
- RecurKind RK = PhiR->getRecurrenceKind();
+ RecurKind RK = getRecurKind();
assert(RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
"Unexpected reduction kind");
- assert(!PhiR->isInLoop() &&
- "In-loop FindLastIV reduction is not supported yet");
- // The recipe's operands are the reduction phi, the start value, the
- // sentinel value, followed by one operand for each part of the reduction.
- unsigned UF = getNumOperands() - 3;
- Value *ReducedPartRdx = State.get(getOperand(3));
+ // The recipe's operands are the start value, the sentinel value, followed
+ // by one operand for each part of the reduction.
+ unsigned UF = getNumOperands() - 2;
+ Value *ReducedPartRdx = State.get(getOperand(2));
RecurKind MinMaxKind;
bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RK);
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
@@ -748,10 +741,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
MinMaxKind = IsSigned ? RecurKind::SMin : RecurKind::UMin;
for (unsigned Part = 1; Part < UF; ++Part)
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
- State.get(getOperand(3 + Part)));
+ State.get(getOperand(2 + Part)));
- Value *Start = State.get(getOperand(1), true);
- Value *Sentinel = getOperand(2)->getLiveInIRValue();
+ Value *Start = State.get(getOperand(0), true);
+ Value *Sentinel = getOperand(1)->getLiveInIRValue();
// Reduce the vector to a scalar.
bool IsFindLast = RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK);
@@ -1390,8 +1383,9 @@ bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
// WidePtrAdd supports scalar and vector base addresses.
return false;
case VPInstruction::ComputeAnyOfResult:
- case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
+ case VPInstruction::ComputeFindIVResult:
+ return Op == getOperand(0);
case VPInstruction::ExtractLane:
return Op == getOperand(0);
};
@@ -2094,7 +2088,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
case OperationType::Cmp:
return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
case OperationType::ReductionOp:
- return Opcode == VPInstruction::ComputeReductionResult;
+ return Opcode == VPInstruction::ComputeReductionResult ||
+ Opcode == VPInstruction::ComputeFindIVResult;
case OperationType::Other:
return true;
}
@@ -2149,8 +2144,24 @@ void VPIRFlags::printFlags(raw_ostream &O) const {
break;
case OperationType::ReductionOp: {
RecurKind RK = static_cast<RecurKind>(ReductionFlags.Kind);
- O << " ("
- << Instruction::getOpcodeName(RecurrenceDescriptor::getOpcode(RK));
+ O << " (";
+ switch (RK) {
+ case RecurKind::FindLastIVUMax:
+ O << "find-last-iv-umax";
+ break;
+ case RecurKind::FindLastIVSMax:
+ O << "find-last-iv-smax";
+ break;
+ case RecurKind::FindFirstIVUMin:
+ O << "find-first-iv-umin";
+ break;
+ case RecurKind::FindFirstIVSMin:
+ O << "find-first-iv-smin";
+ break;
+ default:
+ O << Instruction::getOpcodeName(RecurrenceDescriptor::getOpcode(RK));
+ break;
+ }
if (ReductionFlags.IsInLoop)
O << ", in-loop";
if (ReductionFlags.IsOrdered)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 15859ce16d42b..fa2401e2e84b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -373,7 +373,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
- m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
+ m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
for (unsigned Part = 1; Part != UF; ++Part)
R.addOperand(getValueForPart(Op1, Part));
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index fc63e4975c3f5..bca7813bbec9b 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -238,7 +238,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond>
+; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-iv-result (find-last-iv-smax) ir<%start>, ir<-9223372036854775808>, ir<%cond>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
More information about the llvm-commits
mailing list