[llvm] 071d1fb - [LV] Use VPReductionRecipe for partial reductions (#147513)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 08:18:27 PST 2025
Author: Sam Tebbs
Date: 2025-11-26T16:18:22Z
New Revision: 071d1fb8beda4c0ec9ede0acab12a382f34758fe
URL: https://github.com/llvm/llvm-project/commit/071d1fb8beda4c0ec9ede0acab12a382f34758fe
DIFF: https://github.com/llvm/llvm-project/commit/071d1fb8beda4c0ec9ede0acab12a382f34758fe.diff
LOG: [LV] Use VPReductionRecipe for partial reductions (#147513)
Partial reductions can easily be represented by the VPReductionRecipe
class by setting their scale factor to something greater than 1. This PR
merges the two together and gives VPReductionRecipe a VFScaleFactor so
that it can choose to generate the partial reduction intrinsic at
execute time.
Stacked PRs:
1. https://github.com/llvm/llvm-project/pull/147026
2. https://github.com/llvm/llvm-project/pull/147255
3. https://github.com/llvm/llvm-project/pull/156976
4. https://github.com/llvm/llvm-project/pull/160154
5. https://github.com/llvm/llvm-project/pull/147302
6. https://github.com/llvm/llvm-project/pull/162503
7. -> https://github.com/llvm/llvm-project/pull/147513
Replaces https://github.com/llvm/llvm-project/pull/146073 .
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
llvm/lib/Transforms/Vectorize/VPlanValue.h
llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 393586e504c17..7a37d18c9992c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7021,10 +7021,11 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
VPInstruction::FirstOrderRecurrenceSplice>())))
return true;
}
- // The VPlan-based cost model is more accurate for partial reduction and
+ // The VPlan-based cost model is more accurate for partial reductions and
// comparing against the legacy cost isn't desirable.
- if (isa<VPPartialReductionRecipe>(&R))
- return true;
+ if (auto *VPR = dyn_cast<VPReductionRecipe>(&R))
+ if (VPR->isPartialReduction())
+ return true;
// The VPlan-based cost model can analyze if recipes are scalar
// recursively, but the legacy cost model cannot.
@@ -8207,11 +8208,15 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
// If the PHI is used by a partial reduction, set the scale factor.
+ bool UseInLoopReduction = CM.isInLoopReduction(Phi);
+ bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
unsigned ScaleFactor =
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
+
PhiRecipe = new VPReductionPHIRecipe(
- Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
- CM.useOrderedReductions(RdxDesc), ScaleFactor);
+ Phi, RdxDesc.getRecurrenceKind(), *StartV,
+ getReductionStyle(UseInLoopReduction, UseOrderedReductions,
+ ScaleFactor));
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
@@ -8280,16 +8285,18 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
VPValue *BinOp = Reduction->getOperand(0);
VPValue *Accumulator = Reduction->getOperand(1);
- if (isa<VPReductionPHIRecipe>(BinOp) || isa<VPPartialReductionRecipe>(BinOp))
+ VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
+ if (isa<VPReductionPHIRecipe>(BinOpRecipe) ||
+ (isa<VPReductionRecipe>(BinOpRecipe) &&
+ cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
std::swap(BinOp, Accumulator);
assert(ScaleFactor ==
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
"all accumulators in chain must have same scale factor");
- unsigned ReductionOpcode = Reduction->getOpcode();
auto *ReductionI = Reduction->getUnderlyingInstr();
- if (ReductionOpcode == Instruction::Sub) {
+ if (Reduction->getOpcode() == Instruction::Sub) {
auto *const Zero = ConstantInt::get(ReductionI->getType(), 0);
SmallVector<VPValue *, 2> Ops;
Ops.push_back(Plan.getOrAddLiveIn(Zero));
@@ -8297,14 +8304,15 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRFlags(*ReductionI),
VPIRMetadata(), ReductionI->getDebugLoc());
Builder.insert(BinOp->getDefiningRecipe());
- ReductionOpcode = Instruction::Add;
}
VPValue *Cond = nullptr;
if (CM.blockNeedsPredicationForAnyReason(ReductionI->getParent()))
Cond = getBlockInMask(Builder.getInsertBlock());
- return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
- ScaleFactor, ReductionI);
+
+ return new VPReductionRecipe(
+ RecurKind::Add, FastMathFlags(), ReductionI, Accumulator, BinOp, Cond,
+ RdxUnordered{/*VFScaleFactor=*/ScaleFactor}, ReductionI->getDebugLoc());
}
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8795,9 +8803,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
- auto *RedRecipe = new VPReductionRecipe(
- Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
- PhiR->isOrdered(), CurrentLinkI->getDebugLoc());
+ ReductionStyle Style = getReductionStyle(true, PhiR->isOrdered(), 1);
+ auto *RedRecipe =
+ new VPReductionRecipe(Kind, FMFs, CurrentLinkI, PreviousLink, VecOp,
+ CondOp, Style, CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
@@ -8832,8 +8841,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Don't output selects for partial reductions because they have an output
// with fewer lanes than the VF. So the operands of the select would have
//
diff erent numbers of lanes. Partial reductions mask the input instead.
+ auto *RR = dyn_cast<VPReductionRecipe>(OrigExitingVPV->getDefiningRecipe());
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
- !isa<VPPartialReductionRecipe>(OrigExitingVPV)) {
+ (!RR || !RR->isPartialReduction())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0c7d9c0193a03..d957d9110def9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -44,6 +44,7 @@
#include <functional>
#include <string>
#include <utility>
+#include <variant>
namespace llvm {
@@ -566,7 +567,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
- case VPRecipeBase::VPPartialReductionSC:
return true;
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPInterleaveEVLSC:
@@ -2392,6 +2392,29 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
#endif
};
+/// Possible variants of a reduction.
+
+/// This reduction is ordered and in-loop.
+struct RdxOrdered {};
+/// This reduction is in-loop.
+struct RdxInLoop {};
+/// This reduction is unordered with the partial result scaled down by some
+/// factor.
+struct RdxUnordered {
+ unsigned VFScaleFactor;
+};
+using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
+
+inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
+ unsigned ScaleFactor) {
+ assert((!Ordered || InLoop) && "Ordered implies in-loop");
+ if (Ordered)
+ return RdxOrdered{};
+ if (InLoop)
+ return RdxInLoop{};
+ return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
+}
+
/// A recipe for handling reduction phis. The start value is the first operand
/// of the recipe and the incoming value from the backedge is the second
/// operand.
@@ -2400,32 +2423,21 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// The recurrence kind of the reduction.
const RecurKind Kind;
- /// The phi is part of an in-loop reduction.
- bool IsInLoop;
-
- /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
- bool IsOrdered;
-
- /// When expanding the reduction PHI, the plan's VF element count is divided
- /// by this factor to form the reduction phi's VF.
- unsigned VFScaleFactor = 1;
+ ReductionStyle Style;
public:
/// Create a new VPReductionPHIRecipe for the reduction \p Phi.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
- bool IsInLoop = false, bool IsOrdered = false,
- unsigned VFScaleFactor = 1)
+ ReductionStyle Style)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
- IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
- assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
- }
+ Style(Style) {}
~VPReductionPHIRecipe() override = default;
VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(
dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(),
- *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
+ *getOperand(0), Style);
R->addOperand(getBackedgeValue());
return R;
}
@@ -2435,8 +2447,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;
- /// Get the factor that the VF of this recipe's output should be scaled by.
- unsigned getVFScaleFactor() const { return VFScaleFactor; }
+ /// Get the factor that the VF of this recipe's output should be scaled by, or
+ /// 1 if it isn't scaled.
+ unsigned getVFScaleFactor() const {
+ auto *Partial = std::get_if<RdxUnordered>(&Style);
+ return Partial ? Partial->VFScaleFactor : 1;
+ }
/// Returns the number of incoming values, also number of incoming blocks.
/// Note that at the moment, VPWidenPointerInductionRecipe only has a single
@@ -2447,10 +2463,16 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
RecurKind getRecurrenceKind() const { return Kind; }
/// Returns true, if the phi is part of an ordered reduction.
- bool isOrdered() const { return IsOrdered; }
+ bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
- /// Returns true, if the phi is part of an in-loop reduction.
- bool isInLoop() const { return IsInLoop; }
+ /// Returns true if the phi is part of an in-loop reduction.
+ bool isInLoop() const {
+ return std::holds_alternative<RdxInLoop>(Style) ||
+ std::holds_alternative<RdxOrdered>(Style);
+ }
+
+ /// Returns true if the reduction outputs a vector with a scaled down VF.
+ bool isPartialReduction() const { return getVFScaleFactor() > 1; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool usesFirstLaneOnly(const VPValue *Op) const override {
@@ -2732,23 +2754,25 @@ class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
#endif
};
-/// A recipe to represent inloop reduction operations, performing a reduction on
-/// a vector operand into a scalar value, and adding the result to a chain.
-/// The Operands are {ChainOp, VecOp, [Condition]}.
+/// A recipe to represent inloop, ordered or partial reduction operations. It
+/// performs a reduction on a vector operand into a scalar (vector in the case
+/// of a partial reduction) value, and adds the result to a chain. The Operands
+/// are {ChainOp, VecOp, [Condition]}.
class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
+
/// The recurrence kind for the reduction in question.
RecurKind RdxKind;
- bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;
+ ReductionStyle Style;
protected:
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
FastMathFlags FMFs, Instruction *I,
ArrayRef<VPValue *> Operands, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL)
+ ReductionStyle Style, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
- IsOrdered(IsOrdered) {
+ Style(Style) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
@@ -2759,30 +2783,29 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
public:
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
+ ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
- ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered, DL) {}
+ ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
+ DL) {}
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
+ ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
- ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered, DL) {}
+ ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
+ DL) {}
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxKind, getFastMathFlags(),
getUnderlyingInstr(), getChainOp(), getVecOp(),
- getCondOp(), IsOrdered, getDebugLoc());
+ getCondOp(), Style, getDebugLoc());
}
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
- R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
- R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
+ R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
}
static inline bool classof(const VPUser *U) {
@@ -2809,9 +2832,16 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
/// Return the recurrence kind for the in-loop reduction.
RecurKind getRecurrenceKind() const { return RdxKind; }
/// Return true if the in-loop reduction is ordered.
- bool isOrdered() const { return IsOrdered; };
+ bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
/// Return true if the in-loop reduction is conditional.
bool isConditional() const { return IsConditional; };
+ /// Returns true if the reduction outputs a vector with a scaled down VF.
+ bool isPartialReduction() const { return getVFScaleFactor() > 1; }
+ /// Returns true if the reduction is in-loop.
+ bool isInLoop() const {
+ return std::holds_alternative<RdxInLoop>(Style) ||
+ std::holds_alternative<RdxOrdered>(Style);
+ }
/// The VPValue of the scalar Chain being accumulated.
VPValue *getChainOp() const { return getOperand(0); }
/// The VPValue of the vector value to be reduced.
@@ -2820,69 +2850,12 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
VPValue *getCondOp() const {
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
}
-
-protected:
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void printRecipe(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-};
-
-/// A recipe for forming partial reductions. In the loop, an accumulator and
-/// vector operand are added together and passed to the next iteration as the
-/// next accumulator. After the loop body, the accumulator is reduced to a
-/// scalar value.
-class VPPartialReductionRecipe : public VPReductionRecipe {
- unsigned Opcode;
-
- /// The divisor by which the VF of this recipe's output should be divided
- /// during execution.
- unsigned VFScaleFactor;
-
-public:
- VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
- VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
- : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
- VFScaleFactor, ReductionInst) {}
- VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
- VPValue *Cond, unsigned ScaleFactor,
- Instruction *ReductionInst = nullptr)
- : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
- FastMathFlags(), ReductionInst,
- ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
- Opcode(Opcode), VFScaleFactor(ScaleFactor) {
- [[maybe_unused]] auto *AccumulatorRecipe =
- getChainOp()->getDefiningRecipe();
- // When cloning as part of a VPExpressionRecipe the chain op could have
- // replaced by a temporary VPValue, so it doesn't have a defining recipe.
- assert((!AccumulatorRecipe ||
- isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
- isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
- "Unexpected operand order for partial reduction recipe");
- }
- ~VPPartialReductionRecipe() override = default;
-
- VPPartialReductionRecipe *clone() override {
- return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
- getCondOp(), VFScaleFactor,
- getUnderlyingInstr());
- }
-
- VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
-
- /// Generate the reduction in the loop.
- void execute(VPTransformState &State) override;
-
- /// Return the cost of this VPPartialReductionRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override;
-
- /// Get the binary op's opcode.
- unsigned getOpcode() const { return Opcode; }
-
- /// Get the factor that the VF of this recipe's output should be scaled by.
- unsigned getVFScaleFactor() const { return VFScaleFactor; }
+ /// Get the factor that the VF of this recipe's output should be scaled by, or
+ /// 1 if it isn't scaled.
+ unsigned getVFScaleFactor() const {
+ auto *Partial = std::get_if<RdxUnordered>(&Style);
+ return Partial ? Partial->VFScaleFactor : 1;
+ }
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2905,7 +2878,7 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
- R.isOrdered(), DL) {}
+ getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
~VPReductionEVLRecipe() override = default;
@@ -3173,7 +3146,7 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
void decompose();
unsigned getVFScaleFactor() const {
- auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
+ auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
return PR ? PR->getVFScaleFactor() : 1;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 80a2e4bc3f754..f84a7914ec850 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -288,10 +288,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
- VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
- VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
- return inferScalarType(R->getOperand(0));
- })
+ VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe>(
+ [this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
// VPInstructionWithType must be handled before VPInstruction.
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
VPWidenCastRecipe>(
@@ -561,11 +561,12 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
// fewer lanes than the VF.
unsigned ScaleFactor =
vputils::getVFScaleFactor(VPV->getDefiningRecipe());
- ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
- LLVM_DEBUG(if (VF != VFs[J]) {
- dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
- << " for " << *R << "\n";
- });
+ ElementCount VF = VFs[J];
+ if (ScaleFactor > 1) {
+ VF = VFs[J].divideCoefficientBy(ScaleFactor);
+ LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
+ << " to " << VF << " for " << *R << "\n";);
+ }
Type *ScalarTy = TypeInfo.inferScalarType(VPV);
unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1c88b56ca89dc..e753804aa6374 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -180,7 +180,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
case VPBlendSC:
case VPReductionEVLSC:
- case VPPartialReductionSC:
case VPReductionSC:
case VPScalarIVStepsSC:
case VPVectorPointerSC:
@@ -314,134 +313,6 @@ bool VPRecipeBase::isScalarCast() const {
return VPI && Instruction::isCast(VPI->getOpcode());
}
-InstructionCost
-VPPartialReductionRecipe::computeCost(ElementCount VF,
- VPCostContext &Ctx) const {
- std::optional<unsigned> Opcode;
- VPValue *Op = getVecOp();
- uint64_t MulConst;
-
- InstructionCost CondCost = 0;
- if (isConditional()) {
- CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
- auto *VecTy = Ctx.Types.inferScalarType(Op);
- auto *CondTy = Ctx.Types.inferScalarType(getCondOp());
- CondCost = Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy,
- Pred, Ctx.CostKind);
- }
-
- // If the partial reduction is predicated, a select will be operand 1.
- // If it isn't predicated and the mul isn't operating on a constant, then it
- // should have been turned into a VPExpressionRecipe.
- // FIXME: Replace the entire function with this once all partial reduction
- // variants are bundled into VPExpressionRecipe.
- if (!match(Op, m_Mul(m_VPValue(), m_ConstantInt(MulConst)))) {
- auto *PhiType = Ctx.Types.inferScalarType(getChainOp());
- auto *InputType = Ctx.Types.inferScalarType(getVecOp());
- return CondCost + Ctx.TTI.getPartialReductionCost(
- getOpcode(), InputType, InputType, PhiType, VF,
- TTI::PR_None, TTI::PR_None, {}, Ctx.CostKind);
- }
-
- VPRecipeBase *OpR = Op->getDefiningRecipe();
- Type *InputTypeA = nullptr, *InputTypeB = nullptr;
- TTI::PartialReductionExtendKind ExtAType = TTI::PR_None,
- ExtBType = TTI::PR_None;
-
- auto GetExtendKind = [](VPRecipeBase *R) {
- if (!R)
- return TTI::PR_None;
- auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
- if (!WidenCastR)
- return TTI::PR_None;
- if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
- return TTI::PR_ZeroExtend;
- if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
- return TTI::PR_SignExtend;
- return TTI::PR_None;
- };
-
- // Pick out opcode, type/ext information and use sub side effects from a widen
- // recipe.
- auto HandleWiden = [&](VPWidenRecipe *Widen) {
- if (match(Widen, m_Sub(m_ZeroInt(), m_VPValue(Op)))) {
- Widen = dyn_cast<VPWidenRecipe>(Op);
- }
- Opcode = Widen->getOpcode();
- VPRecipeBase *ExtAR = Widen->getOperand(0)->getDefiningRecipe();
- VPRecipeBase *ExtBR = Widen->getOperand(1)->getDefiningRecipe();
- InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
- : Widen->getOperand(0));
- InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
- : Widen->getOperand(1));
- ExtAType = GetExtendKind(ExtAR);
- ExtBType = GetExtendKind(ExtBR);
-
- using namespace VPlanPatternMatch;
- const APInt *C;
- if (!ExtBR && match(Widen->getOperand(1), m_APInt(C)) &&
- canConstantBeExtended(C, InputTypeA, ExtAType)) {
- InputTypeB = InputTypeA;
- ExtBType = ExtAType;
- }
- };
-
- if (isa<VPWidenCastRecipe>(OpR)) {
- InputTypeA = Ctx.Types.inferScalarType(OpR->getOperand(0));
- ExtAType = GetExtendKind(OpR);
- } else if (isa<VPReductionPHIRecipe>(OpR)) {
- if (auto RedPhiOp1R = dyn_cast_or_null<VPWidenCastRecipe>(getOperand(1))) {
- InputTypeA = Ctx.Types.inferScalarType(RedPhiOp1R->getOperand(0));
- ExtAType = GetExtendKind(RedPhiOp1R);
- } else if (auto Widen = dyn_cast_or_null<VPWidenRecipe>(getOperand(1)))
- HandleWiden(Widen);
- } else if (auto Widen = dyn_cast<VPWidenRecipe>(OpR)) {
- HandleWiden(Widen);
- } else if (auto Reduction = dyn_cast<VPPartialReductionRecipe>(OpR)) {
- return CondCost + Reduction->computeCost(VF, Ctx);
- }
- auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
- return CondCost + Ctx.TTI.getPartialReductionCost(
- getOpcode(), InputTypeA, InputTypeB, PhiType, VF,
- ExtAType, ExtBType, Opcode, Ctx.CostKind);
- ;
-}
-
-void VPPartialReductionRecipe::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
-
- assert(getOpcode() == Instruction::Add &&
- "Unhandled partial reduction opcode");
-
- Value *BinOpVal = State.get(getVecOp());
- Value *PhiVal = State.get(getChainOp());
- assert(PhiVal && BinOpVal && "Phi and Mul must be set");
-
- Type *RetTy = PhiVal->getType();
-
- if (isConditional()) {
- Value *Cond = State.get(getCondOp());
- Value *Zero = ConstantInt::get(BinOpVal->getType(), 0);
- BinOpVal = Builder.CreateSelect(Cond, BinOpVal, Zero);
- }
-
- CallInst *V =
- Builder.CreateIntrinsic(RetTy, Intrinsic::vector_partial_reduce_add,
- {PhiVal, BinOpVal}, nullptr, "partial.reduce");
-
- State.set(this, V);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPPartialReductionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "PARTIAL-REDUCE ";
- printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
- printOperands(O, SlotTracker);
-}
-#endif
-
void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
assert(OpType == Other.OpType && "OpType must match");
switch (OpType) {
@@ -2695,7 +2566,6 @@ void VPBlendRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "Reduction being replicated.");
- Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
RecurKind Kind = getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
"In-loop AnyOf reductions aren't currently supported");
@@ -2717,7 +2587,8 @@ void VPReductionRecipe::execute(VPTransformState &State) {
}
Value *NewRed;
Value *NextInChain;
- if (IsOrdered) {
+ if (isOrdered()) {
+ Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
if (State.VF.isVector())
NewRed =
createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);
@@ -2727,8 +2598,18 @@ void VPReductionRecipe::execute(VPTransformState &State) {
PrevInChain, NewVecOp);
PrevInChain = NewRed;
NextInChain = NewRed;
+ } else if (isPartialReduction()) {
+ assert(Kind == RecurKind::Add && "Unexpected partial reduction kind");
+ Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ false);
+ NewRed = State.Builder.CreateIntrinsic(
+ PrevInChain->getType(), Intrinsic::vector_partial_reduce_add,
+ {PrevInChain, NewVecOp}, nullptr, "partial.reduce");
+ PrevInChain = NewRed;
+ NextInChain = NewRed;
} else {
- PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
+ assert(isInLoop() &&
+ "The reduction must either be ordered, partial or in-loop");
+ Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
@@ -2737,7 +2618,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),
PrevInChain, NewRed);
}
- State.set(this, NextInChain, /*IsScalar*/ true);
+ State.set(this, NextInChain, /*IsScalar*/ !isPartialReduction());
}
void VPReductionEVLRecipe::execute(VPTransformState &State) {
@@ -2784,6 +2665,22 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
std::optional<FastMathFlags> OptionalFMF =
ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
+ if (isPartialReduction()) {
+ InstructionCost CondCost = 0;
+ if (isConditional()) {
+ CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
+ auto *CondTy = cast<VectorType>(
+ toVectorTy(Ctx.Types.inferScalarType(getCondOp()), VF));
+ CondCost = Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy,
+ CondTy, Pred, Ctx.CostKind);
+ }
+ return CondCost + Ctx.TTI.getPartialReductionCost(
+ Opcode, ElementTy, ElementTy, ElementTy, VF,
+ TargetTransformInfo::PR_None,
+ TargetTransformInfo::PR_None, std::nullopt,
+ Ctx.CostKind);
+ }
+
// TODO: Support any-of reductions.
assert(
(!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||
@@ -2889,7 +2786,9 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
unsigned Opcode = RecurrenceDescriptor::getOpcode(
cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());
auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
- return isa<VPPartialReductionRecipe>(ExpressionRecipes.back())
+
+ return cast<VPReductionRecipe>(ExpressionRecipes.back())
+ ->isPartialReduction()
? Ctx.TTI.getPartialReductionCost(
Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr,
RedTy, VF,
@@ -2909,7 +2808,8 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
Opcode = Instruction::Sub;
[[fallthrough]];
case ExpressionTypes::ExtMulAccReduction: {
- if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) {
+ auto *RedR = cast<VPReductionRecipe>(ExpressionRecipes.back());
+ if (RedR->isPartialReduction()) {
auto *Ext0R = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
auto *Ext1R = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
@@ -2948,8 +2848,8 @@ bool VPExpressionRecipe::mayHaveSideEffects() const {
bool VPExpressionRecipe::isSingleScalar() const {
// Cannot use vputils::isSingleScalar(), because all external operands
// of the expression will be live-ins while bundled.
- return isa<VPReductionRecipe>(ExpressionRecipes.back()) &&
- !isa<VPPartialReductionRecipe>(ExpressionRecipes.back());
+ auto *RR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
+ return RR && !RR->isPartialReduction();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2961,12 +2861,11 @@ void VPExpressionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
O << " = ";
auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
- bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);
switch (ExpressionType) {
case ExpressionTypes::ExtendedReduction: {
getOperand(1)->printAsOperand(O, SlotTracker);
- O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
+ O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(Opcode) << " (";
getOperand(0)->printAsOperand(O, SlotTracker);
Red->printFlags(O);
@@ -2983,7 +2882,7 @@ void VPExpressionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
}
case ExpressionTypes::ExtNegatedMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
- O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
+ O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
<< " (sub (0, mul";
@@ -3008,7 +2907,7 @@ void VPExpressionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
case ExpressionTypes::MulAccReduction:
case ExpressionTypes::ExtMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
- O << " + " << (IsPartialReduction ? "partial." : "") << "reduce.";
+ O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";
O << Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
<< " (";
@@ -3045,7 +2944,10 @@ void VPExpressionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
void VPReductionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << Indent << "REDUCE ";
+ if (isPartialReduction())
+ O << Indent << "PARTIAL-REDUCE ";
+ else
+ O << Indent << "REDUCE ";
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
@@ -4433,7 +4335,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
// this value when we vectorize all of the instructions that use the PHI.
BasicBlock *VectorPH =
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
- bool ScalarPHI = State.VF.isScalar() || IsInLoop;
+ bool ScalarPHI = State.VF.isScalar() || isInLoop();
Value *StartV = State.get(StartVPV, ScalarPHI);
Type *VecTy = StartV->getType();
@@ -4442,7 +4344,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
"recipe must be in the vector loop header");
auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
Phi->insertBefore(HeaderBB->getFirstInsertionPt());
- State.set(this, Phi, IsInLoop);
+ State.set(this, Phi, isInLoop());
Phi->addIncoming(StartV, VectorPH);
}
@@ -4455,8 +4357,8 @@ void VPReductionPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
- if (VFScaleFactor != 1)
- O << " (VF scaled by 1/" << VFScaleFactor << ")";
+ if (getVFScaleFactor() > 1)
+ O << " (VF scaled by 1/" << getVFScaleFactor() << ")";
}
#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8bf1003e923b1..808fa08b2b8d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3710,7 +3710,7 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
InstructionCost RedCost = Red->computeCost(VF, Ctx);
- if (isa<VPPartialReductionRecipe>(Red)) {
+ if (Red->isPartialReduction()) {
TargetTransformInfo::PartialReductionExtendKind ExtKind =
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
// FIXME: Move partial reduction creation, costing and clamping
@@ -3751,8 +3751,6 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
static VPExpressionRecipe *
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
VPCostContext &Ctx, VFRange &Range) {
- bool IsPartialReduction = isa<VPPartialReductionRecipe>(Red);
-
unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
return nullptr;
@@ -3770,7 +3768,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
InstructionCost MulAccCost;
- if (IsPartialReduction) {
+ if (Red->isPartialReduction()) {
Type *SrcTy2 =
Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : nullptr;
// FIXME: Move partial reduction creation, costing and clamping
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 839a304904e8b..c7a0fd7407a4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -195,10 +195,9 @@ bool vputils::isSingleScalar(const VPValue *VPV) {
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
(preservesUniformity(VPI->getOpcode()) &&
all_of(VPI->operands(), isSingleScalar));
- if (isa<VPPartialReductionRecipe>(VPV))
- return false;
- if (isa<VPReductionRecipe, VPVectorPointerRecipe, VPVectorEndPointerRecipe>(
- VPV))
+ if (auto *RR = dyn_cast<VPReductionRecipe>(VPV))
+ return !RR->isPartialReduction();
+ if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(VPV))
return true;
if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
return Expr->isSingleScalar();
@@ -270,7 +269,7 @@ unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
return 1;
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
return RR->getVFScaleFactor();
- if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
+ if (auto *RR = dyn_cast<VPReductionRecipe>(R))
return RR->getVFScaleFactor();
if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
return ER->getVFScaleFactor();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 63eacd3d75721..b9f5847ec731c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -349,7 +349,6 @@ class VPDef {
VPInterleaveSC,
VPReductionEVLSC,
VPReductionSC,
- VPPartialReductionSC,
VPReplicateSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
index 6df3f1b418eb6..a1d03c4a7fbc6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
@@ -80,8 +80,8 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) "target-features"="+neon,+do
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<[[RDX_START]]>, ir<%add> (VF scaled by 1/4)
+; CHECK-NEXT: EMIT-SCALAR vp<[[EP_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> (VF scaled by 1/4)
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index>
; CHECK-NEXT: WIDEN ir<%load.a> = load ir<%gep.a>
; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index>
@@ -89,13 +89,13 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) "target-features"="+neon,+do
; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
-; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul>
-; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<16>
-; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<1024>
+; CHECK-NEXT: PARTIAL-REDUCE ir<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<%mul>)
+; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16>
+; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%[0-9]+]]> = compute-reduction-result ir<%accum>, ir<%add>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%[0-9]+]]> = compute-reduction-result ir<[[RDX]]>, ir<[[RDX_NEXT]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 63776b78a2088..a6a1f672c5f70 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1318,35 +1318,29 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
}
{
- auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
- PoisonValue::get(Int32));
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), ChainOp, VecOp,
+ CondOp, RdxUnordered{});
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
EXPECT_FALSE(Recipe.mayReadOrWriteMemory());
- delete Add;
}
{
- auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
- PoisonValue::get(Int32));
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), ChainOp, VecOp,
+ CondOp, RdxUnordered{});
VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_FALSE(EVLRecipe.mayHaveSideEffects());
EXPECT_FALSE(EVLRecipe.mayReadFromMemory());
EXPECT_FALSE(EVLRecipe.mayWriteToMemory());
EXPECT_FALSE(EVLRecipe.mayReadOrWriteMemory());
- delete Add;
}
{
@@ -1689,30 +1683,27 @@ TEST_F(VPRecipeTest, dumpRecipeUnnamedVPValuesNotInPlanOrBlock) {
TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
- auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
- PoisonValue::get(Int32));
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), ChainOp, VecOp,
+ CondOp, RdxUnordered{});
checkVPRecipeCastImpl<VPReductionRecipe, VPUser>(&Recipe);
- delete Add;
+ EXPECT_TRUE(isa<VPUser>(&Recipe));
+ VPRecipeBase *BaseR = &Recipe;
+ EXPECT_TRUE(isa<VPUser>(BaseR));
}
TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
- auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
- PoisonValue::get(Int32));
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
- CondOp, VecOp, false);
+ VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), ChainOp, VecOp,
+ CondOp, RdxUnordered{});
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
checkVPRecipeCastImpl<VPReductionEVLRecipe, VPUser>(&EVLRecipe);
- delete Add;
}
} // namespace
More information about the llvm-commits
mailing list