[llvm-branch-commits] [llvm] [LV] Use VPReductionRecipe for partial reductions (PR #146073)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 27 06:12:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Sam Tebbs (SamTebbs33)
<details>
<summary>Changes</summary>
Partial reductions can easily be represented by the VPReductionRecipe class by setting their scale factor to something greater than 1. This PR merges the two together and gives VPReductionRecipe a VFScaleFactor so that it can choose to generate the partial reduction intrinsic at execute time.
Depends on https://github.com/llvm/llvm-project/pull/144281
---
Patch is 364.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146073.diff
18 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+2)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+15-4)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+20-14)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+50-87)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+8-7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+50-84)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+45-13)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll (+86-66)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll (+85-165)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll (+58-58)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll (+285-525)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+384-361)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll (+13-13)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll (+9-12)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll (+26-26)
- (modified) llvm/unittests/Transforms/Vectorize/VPlanTest.cpp (+4-4)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 90d92e0fcf55c..73f4a6b2af853 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -223,6 +223,8 @@ class TargetTransformInfo {
/// Get the kind of extension that an instruction represents.
LLVM_ABI static PartialReductionExtendKind
getPartialReductionExtendKind(Instruction *I);
+ LLVM_ABI static PartialReductionExtendKind
+ getPartialReductionExtendKind(Instruction::CastOps CastOpc);
/// Construct a TTI object using a type implementing the \c Concept
/// API below.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3ebd9d487ba04..b81403cc35272 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1000,11 +1000,22 @@ InstructionCost TargetTransformInfo::getShuffleCost(
}
TargetTransformInfo::PartialReductionExtendKind
-TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) {
- if (isa<SExtInst>(I))
- return PR_SignExtend;
- if (isa<ZExtInst>(I))
+TargetTransformInfo::getPartialReductionExtendKind(
+ Instruction::CastOps CastOpc) {
+ switch (CastOpc) {
+ case Instruction::CastOps::ZExt:
return PR_ZeroExtend;
+ case Instruction::CastOps::SExt:
+ return PR_SignExtend;
+ default:
+ return PR_None;
+ }
+}
+
+TargetTransformInfo::PartialReductionExtendKind
+TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) {
+ if (auto *Cast = dyn_cast<CastInst>(I))
+ return getPartialReductionExtendKind(Cast->getOpcode());
return PR_None;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bb29e4fc6d232..60d06fb9ce4fd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7050,7 +7050,8 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
}
// The VPlan-based cost model is more accurate for partial reduction and
// comparing against the legacy cost isn't desirable.
- if (isa<VPPartialReductionRecipe>(&R))
+ if (auto *VPR = dyn_cast<VPReductionRecipe>(&R); VPR &&
+ VPR->isPartialReduction())
return true;
/// If a VPlan transform folded a recipe to one producing a single-scalar,
@@ -8280,11 +8281,14 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
// If the PHI is used by a partial reduction, set the scale factor.
- unsigned ScaleFactor =
- getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
- PhiRecipe = new VPReductionPHIRecipe(
- Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi),
- CM.useOrderedReductions(RdxDesc), ScaleFactor);
+ bool UseInLoopReduction = CM.isInLoopReduction(Phi);
+ bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
+ auto ScaleFactor =
+ (UseOrderedReductions || UseInLoopReduction)
+ ? 0
+ : getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
+ PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV,
+ UseOrderedReductions, ScaleFactor);
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
@@ -8348,7 +8352,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
VPValue *Accumulator = Operands[1];
VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
if (isa<VPReductionPHIRecipe>(BinOpRecipe) ||
- isa<VPPartialReductionRecipe>(BinOpRecipe))
+ (isa<VPReductionRecipe>(BinOpRecipe) &&
+ cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
std::swap(BinOp, Accumulator);
unsigned ReductionOpcode = Reduction->getOpcode();
@@ -8369,12 +8374,10 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
Cond = getBlockInMask(Builder.getInsertBlock());
- VPValue *Zero =
- Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
- BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
}
- return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
- ScaleFactor, Reduction);
+
+ return new VPReductionRecipe(RecurKind::Add, FastMathFlags(), Reduction,
+ Accumulator, BinOp, Cond, false, ScaleFactor);
}
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -9154,9 +9157,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
: FastMathFlags();
+ bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
+ unsigned VFScaleFactor = !UseOrderedReductions;
auto *RedRecipe = new VPReductionRecipe(
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
- CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
+ UseOrderedReductions, VFScaleFactor, CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
@@ -9190,8 +9195,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Don't output selects for partial reductions because they have an output
// with fewer lanes than the VF. So the operands of the select would have
// different numbers of lanes. Partial reductions mask the input instead.
+ auto *RR = dyn_cast<VPReductionRecipe>(OrigExitingVPV->getDefiningRecipe());
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
- !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
+ (!RR || !RR->isPartialReduction())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 39e9c55abfa2c..ebeba3f198113 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -552,7 +552,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
- case VPRecipeBase::VPPartialReductionSC:
return true;
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPInterleaveSC:
@@ -2182,34 +2181,37 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Descriptor for the reduction.
const RecurrenceDescriptor &RdxDesc;
- /// The phi is part of an in-loop reduction.
- bool IsInLoop;
-
/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
bool IsOrdered;
- /// When expanding the reduction PHI, the plan's VF element count is divided
- /// by this factor to form the reduction phi's VF.
- unsigned VFScaleFactor = 1;
+ /// The scaling factor, relative to the VF, that this recipe's output is
+ /// divided by.
+ /// For outer-loop reductions this is equal to 1.
+ /// For in-loop reductions this is equal to 0, to specify that this is equal
+ /// to the VF (which may not be known yet). For partial-reductions this is
+ /// equal to another scalar value.
+ unsigned VFScaleFactor;
public:
/// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
/// RdxDesc.
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
- VPValue &Start, bool IsInLoop = false,
- bool IsOrdered = false, unsigned VFScaleFactor = 1)
+ VPValue &Start, bool IsOrdered = false,
+ unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
- RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
- VFScaleFactor(VFScaleFactor) {
- assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
+ RdxDesc(RdxDesc), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
+ assert((!IsOrdered || isInLoop()) &&
+ "IsOrdered requires the reduction to be in-loop");
+ assert(((!isInLoop() && !IsOrdered) || isInLoop()) &&
+ "Invalid VFScaleFactor");
}
~VPReductionPHIRecipe() override = default;
VPReductionPHIRecipe *clone() override {
- auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
- RdxDesc, *getOperand(0), IsInLoop,
- IsOrdered, VFScaleFactor);
+ auto *R =
+ new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
+ *getOperand(0), IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
}
@@ -2235,8 +2237,10 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
- /// Returns true, if the phi is part of an in-loop reduction.
- bool isInLoop() const { return IsInLoop; }
+ /// Returns true if the phi is part of an in-loop reduction.
+ bool isInLoop() const { return VFScaleFactor == 0; }
+
+ bool isPartialReduction() const { return VFScaleFactor > 1; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
@@ -2409,23 +2413,32 @@ class VPInterleaveRecipe : public VPRecipeBase {
Instruction *getInsertPos() const { return IG->getInsertPos(); }
};
-/// A recipe to represent inloop reduction operations, performing a reduction on
-/// a vector operand into a scalar value, and adding the result to a chain.
-/// The Operands are {ChainOp, VecOp, [Condition]}.
+/// A recipe to represent inloop, ordered or partial reduction operations. It
+/// performs a reduction on a vector operand into a scalar (vector in the case
+/// of a partial reduction) value, and adds the result to a chain. The Operands
+/// are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPRecipeWithIRFlags {
/// The recurrence kind for the reduction in question.
RecurKind RdxKind;
bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;
+ /// The scaling factor, relative to the VF, that this recipe's output is
+ /// divided by.
+ /// For outer-loop reductions this is equal to 1.
+ /// For in-loop reductions this is equal to 0, to specify that this is equal
+ /// to the VF (which may not be known yet).
+ /// For partial-reductions this is equal to another scalar value.
+ unsigned VFScaleFactor;
protected:
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
FastMathFlags FMFs, Instruction *I,
ArrayRef<VPValue *> Operands, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL)
+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
- IsOrdered(IsOrdered) {
+ IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
+ assert((!IsOrdered || VFScaleFactor == 0) && "Invalid scale factor");
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
@@ -2436,24 +2449,24 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
public:
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL = {})
+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered, DL) {}
+ IsOrdered, VFScaleFactor, DL) {}
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL = {})
+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered, DL) {}
+ IsOrdered, VFScaleFactor, DL) {}
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
- return new VPReductionRecipe(RdxKind, getFastMathFlags(),
- getUnderlyingInstr(), getChainOp(), getVecOp(),
- getCondOp(), IsOrdered, getDebugLoc());
+ return new VPReductionRecipe(
+ RdxKind, getFastMathFlags(), getUnderlyingInstr(), getChainOp(),
+ getVecOp(), getCondOp(), IsOrdered, VFScaleFactor, getDebugLoc());
}
static inline bool classof(const VPRecipeBase *R) {
@@ -2485,6 +2498,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
bool isOrdered() const { return IsOrdered; };
/// Return true if the in-loop reduction is conditional.
bool isConditional() const { return IsConditional; };
+ /// Return true if the reduction is a partial reduction.
+ bool isPartialReduction() const { return VFScaleFactor > 1; }
/// The VPValue of the scalar Chain being accumulated.
VPValue *getChainOp() const { return getOperand(0); }
/// The VPValue of the vector value to be reduced.
@@ -2493,65 +2508,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
VPValue *getCondOp() const {
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
}
-};
-
-/// A recipe for forming partial reductions. In the loop, an accumulator and
-/// vector operand are added together and passed to the next iteration as the
-/// next accumulator. After the loop body, the accumulator is reduced to a
-/// scalar value.
-class VPPartialReductionRecipe : public VPReductionRecipe {
- unsigned Opcode;
-
- /// The divisor by which the VF of this recipe's output should be divided
- /// during execution.
- unsigned VFScaleFactor;
-
-public:
- VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
- VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
- : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
- VFScaleFactor, ReductionInst) {}
- VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
- VPValue *Cond, unsigned ScaleFactor,
- Instruction *ReductionInst = nullptr)
- : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
- FastMathFlags(), ReductionInst,
- ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
- Opcode(Opcode), VFScaleFactor(ScaleFactor) {
- [[maybe_unused]] auto *AccumulatorRecipe =
- getChainOp()->getDefiningRecipe();
- assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
- isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
- "Unexpected operand order for partial reduction recipe");
- }
- ~VPPartialReductionRecipe() override = default;
-
- VPPartialReductionRecipe *clone() override {
- return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
- getCondOp(), VFScaleFactor,
- getUnderlyingInstr());
- }
-
- VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
-
- /// Generate the reduction in the loop.
- void execute(VPTransformState &State) override;
-
- /// Return the cost of this VPPartialReductionRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override;
-
- /// Get the binary op's opcode.
- unsigned getOpcode() const { return Opcode; }
-
/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
};
/// A recipe to represent inloop reduction operations with vector-predication
@@ -2567,7 +2525,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
- R.isOrdered(), DL) {}
+ R.isOrdered(), 0, DL) {}
~VPReductionEVLRecipe() override = default;
@@ -2768,6 +2726,11 @@ class VPSingleDefBundleRecipe : public VPSingleDefRecipe {
VPWidenRecipe *Mul, VPReductionRecipe *Red)
: VPSingleDefBundleRecipe(BundleTypes::ExtMulAccumulateReduction,
{Ext0, Ext1, Mul, Red}) {}
+ VPSingleDefBundleRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
+ VPWidenRecipe *Mul, VPWidenRecipe *Sub,
+ VPReductionRecipe *Red)
+ : VPSingleDefBundleRecipe(BundleTypes::ExtMulAccumulateReduction,
+ {Ext0, Ext1, Mul, Sub, Red}) {}
~VPSingleDefBundleRecipe() override {
SmallPtrSet<VPRecipeBase *, 4> Seen;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index effcb1e201cc7..37060c87bea8f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -283,10 +283,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
- VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
- VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
- return inferScalarType(R->getOperand(0));
- })
+ VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe>(
+ [this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
// VPInstructionWithType must be handled before VPInstruction.
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
VPWidenCastRecipe>(
@@ -397,7 +397,7 @@ bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
static unsigned getVFScaleFactor(VPRecipeBase *R) {
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
return RR->getVFScaleFactor();
- if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
+ if (auto *RR = dyn_cast<VPReductionRecipe>(R))
return RR->getVFScaleFactor();
assert(
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
@@ -567,8 +567,9 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
} else {
// The output from scaled phis and scaled reductions actually has
// fewer lanes than the VF.
- unsigned ScaleFactor = getVFScaleFactor(R);
- ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
+ ElementCount VF = VFs[J];
+ if (unsigned ScaleFactor = getVFScaleFactor(R))
+ VF = VF.divideCoefficientBy(ScaleFactor);
LLVM_DEBUG(if (VF != VFs[J]) {
dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << "...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146073
More information about the llvm-branch-commits
mailing list