[llvm] [VPlan] Add VPBundleRecipe, replacing extended reduction recipes. (PR #144281)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 06:30:02 PDT 2025
================
@@ -2446,30 +2440,193 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
Ctx.CostKind);
}
-InstructionCost
-VPExtendedReductionRecipe::computeCost(ElementCount VF,
- VPCostContext &Ctx) const {
- unsigned Opcode = RecurrenceDescriptor::getOpcode(getRecurrenceKind());
- Type *RedTy = Ctx.Types.inferScalarType(this);
- auto *SrcVecTy =
- cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
- assert(RedTy->isIntegerTy() &&
- "ExtendedReduction only support integer type currently.");
- return Ctx.TTI.getExtendedReductionCost(Opcode, isZExt(), RedTy, SrcVecTy,
- std::nullopt, Ctx.CostKind);
+void VPBundleRecipe::bundle(ArrayRef<VPValue *> Operands) {
+ assert(!BundledRecipes.empty() && "Nothing to bundle?");
+
+ // Bundle up the operand recipes.
+ SmallPtrSet<VPUser *, 4> BundledUsers;
+ for (auto *R : BundledRecipes)
+ BundledUsers.insert(R);
+
+ // Recipes in the bundle, except the last one, must only be used inside the
+ // bundle. If there other external users, clone the recipes for the bundle.
+ for (unsigned Idx = 0; Idx != BundledRecipes.size() - 1; ++Idx) {
+ VPSingleDefRecipe *R = BundledRecipes[Idx];
+ if (all_of(R->users(), [&BundledUsers](VPUser *U) {
+ return BundledUsers.contains(U);
+ })) {
+ if (R->getParent())
+ R->removeFromParent();
+ continue;
+ }
+ // The users external to the bundle. Clone the recipe for use in the
+ // bundle and update all its in-bundle users.
+ VPSingleDefRecipe *Copy = R->clone();
+ BundledRecipes[Idx] = Copy;
+ BundledUsers.insert(Copy);
+ R->replaceUsesWithIf(Copy, [&BundledUsers](VPUser &U, unsigned) {
+ return BundledUsers.contains(&U);
+ });
+ }
+ if (BundledRecipes.back()->getParent())
+ BundledRecipes.back()->removeFromParent();
+
+ // Internalize all external operands to the bundled operations. To do so,
+ // create new temporary VPValues for all operands not defined by recipe in
+ // the bundle. The original operands are added as operands of the
+ // VPBundleRecipe.
+ for (auto *R : BundledRecipes) {
+ for (const auto &[Idx, Op] : enumerate(R->operands())) {
+ auto *Def = Op->getDefiningRecipe();
+ if (Def && BundledUsers.contains(Def))
+ continue;
+ if (Operands.empty())
+ addOperand(Op);
+ else
+ addOperand(Operands[TmpValues.size()]);
+ TmpValues.push_back(new VPValue());
+ R->setOperand(Idx, TmpValues.back());
+ }
+ }
}
-InstructionCost
-VPMulAccumulateReductionRecipe::computeCost(ElementCount VF,
+void VPBundleRecipe::unbundle() {
+ for (auto *R : BundledRecipes)
+ if (!R->getParent())
+ R->insertBefore(this);
+
+ for (const auto &[Idx, Op] : enumerate(operands()))
+ TmpValues[Idx]->replaceAllUsesWith(Op);
+
+ replaceAllUsesWith(getResultRecipe());
+
+ if (BundleType == BundleTypes::MulAccumulateReduction &&
+ BundledRecipes.size() == 5) {
+ // Note that we will drop the extend after mul which transforms
+ // reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)).
+ // TODO: This transform should be done separately from bundling/unbundling.
+ auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes[0]);
+ auto *Ext1 = cast<VPWidenCastRecipe>(BundledRecipes[1]);
+ auto *Ext2 = cast<VPWidenCastRecipe>(BundledRecipes[3]);
+ auto *Op0 =
+ new VPWidenCastRecipe(Ext0->getOpcode(), Ext0->getOperand(0),
+ Ext2->getResultType(), *Ext0, getDebugLoc());
+ Op0->insertBefore(Ext0);
+
+ VPSingleDefRecipe *Op1 = Op0;
+ if (Ext0 != Ext1) {
+ Op1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),
+ Ext2->getResultType(), *Ext1, getDebugLoc());
+ Op1->insertBefore(Ext1);
+ }
+ auto *Mul = cast<VPWidenRecipe>(BundledRecipes[2]);
+ auto *Red = cast<VPReductionRecipe>(BundledRecipes[4]);
+ Mul->setOperand(0, Op0);
+ Mul->setOperand(1, Op1);
+ Red->setOperand(1, Mul);
+ Ext0->eraseFromParent();
+ Ext2->eraseFromParent();
+ if (Ext0 != Ext1)
+ Ext1->eraseFromParent();
+ }
+ BundledRecipes.clear();
+}
+
+InstructionCost VPBundleRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Type *RedTy = Ctx.Types.inferScalarType(this);
- auto *SrcVecTy =
- cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
- return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy,
- Ctx.CostKind);
+ auto *SrcVecTy = cast<VectorType>(
+ toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));
+ assert(RedTy->isIntegerTy() &&
+ "ExtendedReduction only support integer type currently.");
+ switch (BundleType) {
+ case BundleTypes::ExtendedReduction: {
+ unsigned Opcode = RecurrenceDescriptor::getOpcode(
+ cast<VPReductionRecipe>(BundledRecipes[1])->getRecurrenceKind());
+ return Ctx.TTI.getExtendedReductionCost(
----------------
sdesmalen-arm wrote:
Not something to fix up in this PR, but I have noticed that the `select` that is inserted when the reduction is predicated, is not modelled. However, a `VPSelect` at the moment also returns a cost of `0` which seems wrong.
https://github.com/llvm/llvm-project/pull/144281
More information about the llvm-commits
mailing list