[llvm] [VPlan] Replace RdxDesc with RecurKind in VPReductionPHIRecipe (NFC). (PR #142322)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 29 05:07:46 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/142322
>From 263d23b052ee251d270195cc1d03c64338fc4fdd Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 1 Jun 2025 22:07:35 +0100
Subject: [PATCH 1/2] [VPlan] Truncate/Extend ComputeReductionResult at
construction (NFC).
Instead of looking up the narrower reduction type via getRecurrenceType
we can generate the needed extend directly at constructiond re-use the
truncated value from the loop.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 52 +++++++++++--------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 17 ------
.../LoopVectorize/X86/cost-model.ll | 3 +-
.../epilog-vectorization-reductions.ll | 6 +--
.../LoopVectorize/reduction-small-size.ll | 9 ++--
.../scalable-reduction-inloop.ll | 4 +-
6 files changed, 36 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 95479373b4393..80d31136ebbd6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7255,7 +7255,10 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
// Get the VPInstruction computing the reduction result in the middle block.
// The first operand may not be from the middle block if it is not connected
// to the scalar preheader. In that case, there's nothing to fix.
- auto *EpiRedResult = dyn_cast<VPInstruction>(EpiResumePhiR->getOperand(0));
+ VPValue *Incoming = EpiResumePhiR->getOperand(0);
+ match(Incoming, VPlanPatternMatch::m_ZExtOrSExt(
+ VPlanPatternMatch::m_VPValue(Incoming)));
+ auto *EpiRedResult = dyn_cast<VPInstruction>(Incoming);
if (!EpiRedResult ||
(EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
@@ -9206,28 +9209,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
PhiR->setOperand(1, NewExitingVPV);
}
- // If the vector reduction can be performed in a smaller type, we truncate
- // then extend the loop exit value to enable InstCombine to evaluate the
- // entire expression in the smaller type.
- if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
- !RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
- assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
- Type *RdxTy = RdxDesc.getRecurrenceType();
- auto *Trunc =
- new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
- auto *Extnd =
- RdxDesc.isSigned()
- ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
- : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
-
- Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
- Extnd->insertAfter(Trunc);
- if (PhiR->getOperand(1) == NewExitingVPV)
- PhiR->setOperand(1, Extnd->getVPSingleValue());
- NewExitingVPV = Extnd;
- }
-
// We want code in the middle block to appear to execute on the location of
// the scalar loop's latch terminator because: (a) it is all compiler
// generated, (b) these instructions are always executed after evaluating
@@ -9266,6 +9247,31 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createNaryOp(VPInstruction::ComputeReductionResult,
{PhiR, NewExitingVPV}, Flags, ExitDL);
}
+ // If the vector reduction can be performed in a smaller type, we truncate
+ // then extend the loop exit value to enable InstCombine to evaluate the
+ // entire expression in the smaller type.
+ if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
+ !RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
+ Type *RdxTy = RdxDesc.getRecurrenceType();
+ auto *Trunc =
+ new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
+ Instruction::CastOps ExtendOpc =
+ RdxDesc.isSigned() ? Instruction::SExt : Instruction::ZExt;
+ auto *Extnd = new VPWidenCastRecipe(ExtendOpc, Trunc, PhiTy);
+ Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
+ Extnd->insertAfter(Trunc);
+ if (PhiR->getOperand(1) == NewExitingVPV)
+ PhiR->setOperand(1, Extnd->getVPSingleValue());
+
+ // Update ComputeReductionResult with the truncated exiting value and
+ // extend its result.
+ FinalReductionResult->setOperand(1, Trunc);
+ FinalReductionResult =
+ Builder.createScalarCast(ExtendOpc, FinalReductionResult, PhiTy, {});
+ }
+
// Update all users outside the vector region. Also replace redundant
// ExtractLastElement.
for (auto *U : to_vector(OrigExitingVPV->users())) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 472b5700bd358..1a38932ef99fe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -771,7 +771,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
"should be handled by ComputeFindIVResult");
- Type *ResultTy = State.TypeAnalysis.inferScalarType(this);
// The recipe's operands are the reduction phi, followed by one operand for
// each part of the reduction.
unsigned UF = getNumOperands() - 1;
@@ -783,15 +782,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (hasFastMathFlags())
Builder.setFastMathFlags(getFastMathFlags());
- // If the vector reduction can be performed in a smaller type, we truncate
- // then extend the loop exit value to enable InstCombine to evaluate the
- // entire expression in the smaller type.
- // TODO: Handle this in truncateToMinBW.
- if (State.VF.isVector() && ResultTy != RdxDesc.getRecurrenceType()) {
- Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
- for (unsigned Part = 0; Part < UF; ++Part)
- RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
- }
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
if (PhiR->isOrdered()) {
@@ -816,13 +806,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
-
- // If the reduction can be performed in a smaller type, we need to extend
- // the reduction to the wider type before we branch to the original loop.
- if (ResultTy != RdxDesc.getRecurrenceType())
- ReducedPartRdx = RdxDesc.isSigned()
- ? Builder.CreateSExt(ReducedPartRdx, ResultTy)
- : Builder.CreateZExt(ReducedPartRdx, ResultTy);
}
return ReducedPartRdx;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 2c6fe4f5c808e..147e949808b54 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -1167,8 +1167,7 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1>
-; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP10]])
+; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP21:%.*]] = zext i1 [[TMP20]] to i32
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_PH]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index 0a2bb8d5682f2..c101d6a19aa2e 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -208,8 +208,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i16>
-; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP10]] to i32
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
@@ -234,8 +233,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 256
; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP20]] to <4 x i16>
-; CHECK-NEXT: [[TMP23:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP22]])
+; CHECK-NEXT: [[TMP23:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP19]])
; CHECK-NEXT: [[TMP24:%.*]] = zext i16 [[TMP23]] to i32
; CHECK-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
index 796c1d116aa19..13cc1b657d231 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
@@ -25,8 +25,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP6]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -104,8 +103,7 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i8>
-; CHECK-NEXT: [[TMP9:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP8]])
+; CHECK-NEXT: [[TMP9:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP5]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i8 [[TMP9]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -183,8 +181,7 @@ define i32 @PR35734(i32 %x, i32 %y) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index 079f6b73e8861..e901d9801a143 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -43,9 +43,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
-; CHECK-NEXT: [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>
-; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP38]], [[TMP37]]
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP35]], [[TMP33]]
; CHECK-NEXT: [[TMP39:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> [[BIN_RDX]])
; CHECK-NEXT: [[TMP40:%.*]] = zext i8 [[TMP39]] to i32
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, [[N_VEC]]
>From 5c7962735d1ae1416d3e9ad56b011ca79b6b0ae3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 30 May 2025 10:02:17 +0100
Subject: [PATCH 2/2] [VPlan] Replace RdxDesc with RecurKind in
VPReductionPHIRecipe (NFC).
Replace VPReductionPHIRecipe with RecurKind in VPReductionPHIRecipe, as
all VPlan analyses and codegen only require the recurrence kind. This
enables creating new VPReductionPHIRecipe directly in LV, without
needing to construction a whole RecurrenceDescriptor object.
Depends on
https://github.com/llvm/llvm-project/pull/141860
https://github.com/llvm/llvm-project/pull/141932
https://github.com/llvm/llvm-project/pull/142290
https://github.com/llvm/llvm-project/pull/142291
---
.../Transforms/Vectorize/LoopVectorize.cpp | 27 +++++++------------
llvm/lib/Transforms/Vectorize/VPlan.h | 21 +++++++++------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 12 ++++-----
.../Transforms/Vectorize/VPlanTransforms.cpp | 3 +--
4 files changed, 28 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 80d31136ebbd6..aca0693d57a24 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7267,8 +7267,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
auto *EpiRedHeaderPhi =
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
- const RecurrenceDescriptor &RdxDesc =
- EpiRedHeaderPhi->getRecurrenceDescriptor();
+ RecurKind Kind = EpiRedHeaderPhi->getRecurrenceKind();
Value *MainResumeValue;
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
assert((VPI->getOpcode() == VPInstruction::Broadcast ||
@@ -7277,8 +7276,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
} else
MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind)) {
[[maybe_unused]] Value *StartV =
EpiRedResult->getOperand(1)->getLiveInIRValue();
auto *Cmp = cast<ICmpInst>(MainResumeValue);
@@ -7288,8 +7286,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
- } else if (RecurrenceDescriptor::isFindIVRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
+ } else if (RecurrenceDescriptor::isFindIVRecurrenceKind(Kind)) {
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
using namespace llvm::PatternMatch;
@@ -9040,8 +9037,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
continue;
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind Kind = RdxDesc.getRecurrenceKind();
+ RecurKind Kind = PhiR->getRecurrenceKind();
assert(
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
@@ -9147,6 +9143,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
+ const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
+ cast<PHINode>(PhiR->getUnderlyingInstr()));
// Non-FP RdxDescs will have all fast math flags set, so clear them.
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
@@ -9177,7 +9175,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR)
continue;
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
+ cast<PHINode>(PhiR->getUnderlyingInstr()));
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
// If tail is folded by masking, introduce selects between the phi
// and the users outside the vector region of each reduction, at the
@@ -9820,14 +9819,9 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
}));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
- const RecurrenceDescriptor &RdxDesc =
- ReductionPhi->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ RecurKind RK = ReductionPhi->getRecurrenceKind();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue();
- assert(RdxDesc.getRecurrenceStartValue() == StartV &&
- "start value from ComputeAnyOfResult must match");
-
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
@@ -9836,9 +9830,6 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
} else if (RecurrenceDescriptor::isFindIVRecurrenceKind(RK)) {
Value *StartV = getStartValueFromReductionResult(RdxResult);
- assert(RdxDesc.getRecurrenceStartValue() == StartV &&
- "start value from ComputeFinIVResult must match");
-
ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 61b5ccd85bc6e..a6fa917d0a8f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2187,7 +2187,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
public VPUnrollPartAccessor<2> {
/// Descriptor for the reduction.
- const RecurrenceDescriptor &RdxDesc;
+ const RecurKind Kind;
/// The phi is part of an in-loop reduction.
bool IsInLoop;
@@ -2206,8 +2206,15 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPValue &Start, bool IsInLoop = false,
bool IsOrdered = false, unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
- RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
- VFScaleFactor(VFScaleFactor) {
+ Kind(RdxDesc.getRecurrenceKind()), IsInLoop(IsInLoop),
+ IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
+ assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
+ }
+ VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
+ bool IsInLoop = false, bool IsOrdered = false,
+ unsigned VFScaleFactor = 1)
+ : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
+ IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}
@@ -2215,8 +2222,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
- RdxDesc, *getOperand(0), IsInLoop,
- IsOrdered, VFScaleFactor);
+ getRecurrenceKind(), *getOperand(0),
+ IsInLoop, IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
}
@@ -2235,9 +2242,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPSlotTracker &SlotTracker) const override;
#endif
- const RecurrenceDescriptor &getRecurrenceDescriptor() const {
- return RdxDesc;
- }
+ RecurKind getRecurrenceKind() const { return Kind; }
/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1a38932ef99fe..0500c4d69d1cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -730,8 +730,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// and will be removed by breaking up the recipe further.
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
// Get its reduction variable descriptor.
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ [[maybe_unused]] RecurKind RK = PhiR->getRecurrenceKind();
assert(RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
"Unexpected reduction kind");
assert(!PhiR->isInLoop() &&
@@ -765,9 +764,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
// and will be removed by breaking up the recipe further.
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
// Get its reduction variable descriptor.
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ RecurKind RK = PhiR->getRecurrenceKind();
assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
"should be handled by ComputeFindIVResult");
@@ -793,9 +791,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
else
- ReducedPartRdx =
- Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
- RdxPart, ReducedPartRdx, "bin.rdx");
+ ReducedPartRdx = Builder.CreateBinOp(
+ (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK),
+ RdxPart, ReducedPartRdx, "bin.rdx");
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index bcfb889469ea1..3f3ac6e957c3c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1735,8 +1735,7 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR)
continue;
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
+ RecurKind RK = PhiR->getRecurrenceKind();
if (RK != RecurKind::Add && RK != RecurKind::Mul)
continue;
More information about the llvm-commits
mailing list