[llvm] [VPlan] Replace RdxDesc with RecurKind in VPReductionPHIRecipe (NFC). (PR #142322)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 25 02:51:07 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/142322
>From 927ed49c529be68d77c38bf5d7674aea42cf97f4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 1 Jun 2025 22:07:35 +0100
Subject: [PATCH 1/2] [VPlan] Truncate/Extend ComputeReductionResult at
construction (NFC).
Instead of looking up the narrower reduction type via getRecurrenceType
we can generate the needed extend directly at constructiond re-use the
truncated value from the loop.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 52 +++++++++++--------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 17 ------
.../LoopVectorize/X86/cost-model.ll | 3 +-
.../epilog-vectorization-reductions.ll | 6 +--
.../LoopVectorize/reduction-small-size.ll | 9 ++--
.../scalable-reduction-inloop.ll | 4 +-
6 files changed, 36 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5eda2003920e6..aa7e7a6625894 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7260,7 +7260,10 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
// Get the VPInstruction computing the reduction result in the middle block.
// The first operand may not be from the middle block if it is not connected
// to the scalar preheader. In that case, there's nothing to fix.
- auto *EpiRedResult = dyn_cast<VPInstruction>(EpiResumePhiR->getOperand(0));
+ VPValue *Incoming = EpiResumePhiR->getOperand(0);
+ match(Incoming, VPlanPatternMatch::m_ZExtOrSExt(
+ VPlanPatternMatch::m_VPValue(Incoming)));
+ auto *EpiRedResult = dyn_cast<VPInstruction>(Incoming);
if (!EpiRedResult ||
(EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
@@ -9211,28 +9214,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
PhiR->setOperand(1, NewExitingVPV);
}
- // If the vector reduction can be performed in a smaller type, we truncate
- // then extend the loop exit value to enable InstCombine to evaluate the
- // entire expression in the smaller type.
- if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
- !RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
- assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
- Type *RdxTy = RdxDesc.getRecurrenceType();
- auto *Trunc =
- new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
- auto *Extnd =
- RdxDesc.isSigned()
- ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
- : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
-
- Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
- Extnd->insertAfter(Trunc);
- if (PhiR->getOperand(1) == NewExitingVPV)
- PhiR->setOperand(1, Extnd->getVPSingleValue());
- NewExitingVPV = Extnd;
- }
-
// We want code in the middle block to appear to execute on the location of
// the scalar loop's latch terminator because: (a) it is all compiler
// generated, (b) these instructions are always executed after evaluating
@@ -9271,6 +9252,31 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createNaryOp(VPInstruction::ComputeReductionResult,
{PhiR, NewExitingVPV}, Flags, ExitDL);
}
+ // If the vector reduction can be performed in a smaller type, we truncate
+ // then extend the loop exit value to enable InstCombine to evaluate the
+ // entire expression in the smaller type.
+ if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
+ !RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
+ Type *RdxTy = RdxDesc.getRecurrenceType();
+ auto *Trunc =
+ new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
+ Instruction::CastOps ExtendOpc =
+ RdxDesc.isSigned() ? Instruction::SExt : Instruction::ZExt;
+ auto *Extnd = new VPWidenCastRecipe(ExtendOpc, Trunc, PhiTy);
+ Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
+ Extnd->insertAfter(Trunc);
+ if (PhiR->getOperand(1) == NewExitingVPV)
+ PhiR->setOperand(1, Extnd->getVPSingleValue());
+
+ // Update ComputeReductionResult with the truncated exiting value and
+ // extend its result.
+ FinalReductionResult->setOperand(1, Trunc);
+ FinalReductionResult =
+ Builder.createScalarCast(ExtendOpc, FinalReductionResult, PhiTy, {});
+ }
+
// Update all users outside the vector region. Also replace redundant
// ExtractLastElement.
for (auto *U : to_vector(OrigExitingVPV->users())) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4b0b74c0ea7d8..65ece51fa9811 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -731,7 +731,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
"should be handled by ComputeFindLastIVResult");
- Type *ResultTy = State.TypeAnalysis.inferScalarType(this);
// The recipe's operands are the reduction phi, followed by one operand for
// each part of the reduction.
unsigned UF = getNumOperands() - 1;
@@ -743,15 +742,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (hasFastMathFlags())
Builder.setFastMathFlags(getFastMathFlags());
- // If the vector reduction can be performed in a smaller type, we truncate
- // then extend the loop exit value to enable InstCombine to evaluate the
- // entire expression in the smaller type.
- // TODO: Handle this in truncateToMinBW.
- if (State.VF.isVector() && ResultTy != RdxDesc.getRecurrenceType()) {
- Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
- for (unsigned Part = 0; Part < UF; ++Part)
- RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
- }
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
if (PhiR->isOrdered()) {
@@ -776,13 +766,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
-
- // If the reduction can be performed in a smaller type, we need to extend
- // the reduction to the wider type before we branch to the original loop.
- if (ResultTy != RdxDesc.getRecurrenceType())
- ReducedPartRdx = RdxDesc.isSigned()
- ? Builder.CreateSExt(ReducedPartRdx, ResultTy)
- : Builder.CreateZExt(ReducedPartRdx, ResultTy);
}
return ReducedPartRdx;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 2c6fe4f5c808e..147e949808b54 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -1167,8 +1167,7 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1>
-; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP10]])
+; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP21:%.*]] = zext i1 [[TMP20]] to i32
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_PH]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index 0a2bb8d5682f2..c101d6a19aa2e 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -208,8 +208,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i16>
-; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP10]] to i32
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
@@ -234,8 +233,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 256
; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP20]] to <4 x i16>
-; CHECK-NEXT: [[TMP23:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP22]])
+; CHECK-NEXT: [[TMP23:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = zext i16 [[TMP23]] to i32
; CHECK-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
index 796c1d116aa19..13cc1b657d231 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
@@ -25,8 +25,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP6]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -104,8 +103,7 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i8>
-; CHECK-NEXT: [[TMP9:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP8]])
+; CHECK-NEXT: [[TMP9:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP5]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i8 [[TMP9]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -183,8 +181,7 @@ define i32 @PR35734(i32 %x, i32 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index 079f6b73e8861..e901d9801a143 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -43,9 +43,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
-; CHECK-NEXT: [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>
-; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP38]], [[TMP37]]
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP35]], [[TMP33]]
; CHECK-NEXT: [[TMP39:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> [[BIN_RDX]])
; CHECK-NEXT: [[TMP40:%.*]] = zext i8 [[TMP39]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, [[N_VEC]]
>From 4cba7deb485add5bd57ee2b88cd4925c9bf7cedf Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 30 May 2025 10:02:17 +0100
Subject: [PATCH 2/2] [VPlan] Replace RdxDesc with RecurKind in
VPReductionPHIRecipe (NFC).
Replace VPReductionPHIRecipe with RecurKind in VPReductionPHIRecipe, as
all VPlan analyses and codegen only require the recurrence kind. This
enables creating new VPReductionPHIRecipe directly in LV, without
needing to construction a whole RecurrenceDescriptor object.
Depends on
https://github.com/llvm/llvm-project/pull/141860
https://github.com/llvm/llvm-project/pull/141932
https://github.com/llvm/llvm-project/pull/142290
https://github.com/llvm/llvm-project/pull/142291
---
.../Transforms/Vectorize/LoopVectorize.cpp | 27 +++++++------------
llvm/lib/Transforms/Vectorize/VPlan.h | 21 +++++++++------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 12 ++++-----
.../Transforms/Vectorize/VPlanTransforms.cpp | 3 +--
4 files changed, 28 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index aa7e7a6625894..ea65549663356 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7272,8 +7272,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
auto *EpiRedHeaderPhi =
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
- const RecurrenceDescriptor &RdxDesc =
- EpiRedHeaderPhi->getRecurrenceDescriptor();
+ RecurKind Kind = EpiRedHeaderPhi->getRecurrenceKind();
Value *MainResumeValue;
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
assert((VPI->getOpcode() == VPInstruction::Broadcast ||
@@ -7282,8 +7281,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
} else
MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind)) {
Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue();
(void)StartV;
auto *Cmp = cast<ICmpInst>(MainResumeValue);
@@ -7293,8 +7291,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
- } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
+ } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind)) {
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
using namespace llvm::PatternMatch;
@@ -9045,8 +9042,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
continue;
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind Kind = RdxDesc.getRecurrenceKind();
+ RecurKind Kind = PhiR->getRecurrenceKind();
assert(
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
@@ -9152,6 +9148,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
+ const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
+ cast<PHINode>(PhiR->getUnderlyingInstr()));
// Non-FP RdxDescs will have all fast math flags set, so clear them.
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
@@ -9182,7 +9180,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR)
continue;
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
+ cast<PHINode>(PhiR->getUnderlyingInstr()));
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
// If tail is folded by masking, introduce selects between the phi
// and the users outside the vector region of each reduction, at the
@@ -9825,14 +9824,9 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
}));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
- const RecurrenceDescriptor &RdxDesc =
- ReductionPhi->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ RecurKind RK = ReductionPhi->getRecurrenceKind();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue();
- assert(RdxDesc.getRecurrenceStartValue() == StartV &&
- "start value from ComputeAnyOfResult must match");
-
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
@@ -9841,9 +9835,6 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
Value *StartV = getStartValueFromReductionResult(RdxResult);
- assert(RdxDesc.getRecurrenceStartValue() == StartV &&
- "start value from ComputeFindLastIVResult must match");
-
ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 972eca1fe8376..08ea925e763e8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2174,7 +2174,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
public VPUnrollPartAccessor<2> {
/// Descriptor for the reduction.
- const RecurrenceDescriptor &RdxDesc;
+ const RecurKind Kind;
/// The phi is part of an in-loop reduction.
bool IsInLoop;
@@ -2193,8 +2193,15 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPValue &Start, bool IsInLoop = false,
bool IsOrdered = false, unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
- RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
- VFScaleFactor(VFScaleFactor) {
+ Kind(RdxDesc.getRecurrenceKind()), IsInLoop(IsInLoop),
+ IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
+ assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
+ }
+ VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
+ bool IsInLoop = false, bool IsOrdered = false,
+ unsigned VFScaleFactor = 1)
+ : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
+ IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}
@@ -2202,8 +2209,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
- RdxDesc, *getOperand(0), IsInLoop,
- IsOrdered, VFScaleFactor);
+ getRecurrenceKind(), *getOperand(0),
+ IsInLoop, IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
}
@@ -2222,9 +2229,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPSlotTracker &SlotTracker) const override;
#endif
- const RecurrenceDescriptor &getRecurrenceDescriptor() const {
- return RdxDesc;
- }
+ RecurKind getRecurrenceKind() const { return Kind; }
/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 65ece51fa9811..5319a69f1aba8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -697,8 +697,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// and will be removed by breaking up the recipe further.
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
// Get its reduction variable descriptor.
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ [[maybe_unused]] RecurKind RK = PhiR->getRecurrenceKind();
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
"Unexpected reduction kind");
assert(!PhiR->isInLoop() &&
@@ -725,9 +724,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
// and will be removed by breaking up the recipe further.
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
// Get its reduction variable descriptor.
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ RecurKind RK = PhiR->getRecurrenceKind();
assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
"should be handled by ComputeFindLastIVResult");
@@ -753,9 +751,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
else
- ReducedPartRdx =
- Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
- RdxPart, ReducedPartRdx, "bin.rdx");
+ ReducedPartRdx = Builder.CreateBinOp(
+ (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK),
+ RdxPart, ReducedPartRdx, "bin.rdx");
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ac6be09ef271d..f72d7a41d904c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1717,8 +1717,7 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR)
continue;
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ RecurKind RK = PhiR->getRecurrenceKind();
if (RK != RecurKind::Add && RK != RecurKind::Mul)
continue;
More information about the llvm-commits
mailing list