[llvm] [VPlan] Add VPBundleRecipe, replacing extended reduction recipes. (PR #144281)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 24 06:30:02 PDT 2025


================
@@ -2446,30 +2440,193 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
                                             Ctx.CostKind);
 }
 
-InstructionCost
-VPExtendedReductionRecipe::computeCost(ElementCount VF,
-                                       VPCostContext &Ctx) const {
-  unsigned Opcode = RecurrenceDescriptor::getOpcode(getRecurrenceKind());
-  Type *RedTy = Ctx.Types.inferScalarType(this);
-  auto *SrcVecTy =
-      cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
-  assert(RedTy->isIntegerTy() &&
-         "ExtendedReduction only support integer type currently.");
-  return Ctx.TTI.getExtendedReductionCost(Opcode, isZExt(), RedTy, SrcVecTy,
-                                          std::nullopt, Ctx.CostKind);
+void VPBundleRecipe::bundle(ArrayRef<VPValue *> Operands) {
+  assert(!BundledRecipes.empty() && "Nothing to bundle?");
+
+  // Bundle up the operand recipes.
+  SmallPtrSet<VPUser *, 4> BundledUsers;
+  for (auto *R : BundledRecipes)
+    BundledUsers.insert(R);
+
+  // Recipes in the bundle, except the last one, must only be used inside the
+  // bundle. If there other external users, clone the recipes for the bundle.
+  for (unsigned Idx = 0; Idx != BundledRecipes.size() - 1; ++Idx) {
+    VPSingleDefRecipe *R = BundledRecipes[Idx];
+    if (all_of(R->users(), [&BundledUsers](VPUser *U) {
+          return BundledUsers.contains(U);
+        })) {
+      if (R->getParent())
+        R->removeFromParent();
+      continue;
+    }
+    // The users external to the bundle. Clone the recipe for use in the
+    // bundle and update all its in-bundle users.
+    VPSingleDefRecipe *Copy = R->clone();
+    BundledRecipes[Idx] = Copy;
+    BundledUsers.insert(Copy);
+    R->replaceUsesWithIf(Copy, [&BundledUsers](VPUser &U, unsigned) {
+      return BundledUsers.contains(&U);
+    });
+  }
+  if (BundledRecipes.back()->getParent())
+    BundledRecipes.back()->removeFromParent();
+
+  // Internalize all external operands to the bundled operations. To do so,
+  // create new temporary VPValues for all operands not defined by recipe in
+  // the bundle. The original operands are added as operands of the
+  // VPBundleRecipe.
+  for (auto *R : BundledRecipes) {
+    for (const auto &[Idx, Op] : enumerate(R->operands())) {
+      auto *Def = Op->getDefiningRecipe();
+      if (Def && BundledUsers.contains(Def))
+        continue;
+      if (Operands.empty())
+        addOperand(Op);
+      else
+        addOperand(Operands[TmpValues.size()]);
+      TmpValues.push_back(new VPValue());
+      R->setOperand(Idx, TmpValues.back());
+    }
+  }
 }
 
-InstructionCost
-VPMulAccumulateReductionRecipe::computeCost(ElementCount VF,
+void VPBundleRecipe::unbundle() {
+  for (auto *R : BundledRecipes)
+    if (!R->getParent())
+      R->insertBefore(this);
+
+  for (const auto &[Idx, Op] : enumerate(operands()))
+    TmpValues[Idx]->replaceAllUsesWith(Op);
+
+  replaceAllUsesWith(getResultRecipe());
+
+  if (BundleType == BundleTypes::MulAccumulateReduction &&
+      BundledRecipes.size() == 5) {
+    // Note that we will drop the extend after mul which transforms
+    // reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)).
+    // TODO: This transform should be done separately from bundling/unbundling.
+    auto *Ext0 = cast<VPWidenCastRecipe>(BundledRecipes[0]);
+    auto *Ext1 = cast<VPWidenCastRecipe>(BundledRecipes[1]);
+    auto *Ext2 = cast<VPWidenCastRecipe>(BundledRecipes[3]);
+    auto *Op0 =
+        new VPWidenCastRecipe(Ext0->getOpcode(), Ext0->getOperand(0),
+                              Ext2->getResultType(), *Ext0, getDebugLoc());
+    Op0->insertBefore(Ext0);
+
+    VPSingleDefRecipe *Op1 = Op0;
+    if (Ext0 != Ext1) {
+      Op1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),
+                                  Ext2->getResultType(), *Ext1, getDebugLoc());
+      Op1->insertBefore(Ext1);
+    }
+    auto *Mul = cast<VPWidenRecipe>(BundledRecipes[2]);
+    auto *Red = cast<VPReductionRecipe>(BundledRecipes[4]);
+    Mul->setOperand(0, Op0);
+    Mul->setOperand(1, Op1);
+    Red->setOperand(1, Mul);
+    Ext0->eraseFromParent();
+    Ext2->eraseFromParent();
+    if (Ext0 != Ext1)
+      Ext1->eraseFromParent();
+  }
+  BundledRecipes.clear();
+}
+
+InstructionCost VPBundleRecipe::computeCost(ElementCount VF,
                                             VPCostContext &Ctx) const {
   Type *RedTy = Ctx.Types.inferScalarType(this);
-  auto *SrcVecTy =
-      cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
-  return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy,
-                                        Ctx.CostKind);
+  auto *SrcVecTy = cast<VectorType>(
+      toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));
+  assert(RedTy->isIntegerTy() &&
+         "ExtendedReduction only support integer type currently.");
+  switch (BundleType) {
+  case BundleTypes::ExtendedReduction: {
+    unsigned Opcode = RecurrenceDescriptor::getOpcode(
+        cast<VPReductionRecipe>(BundledRecipes[1])->getRecurrenceKind());
+    return Ctx.TTI.getExtendedReductionCost(
----------------
sdesmalen-arm wrote:

Not something to fix up in this PR, but I have noticed that the `select` that is inserted when the reduction is predicated, is not modelled. However, a `VPSelect` at the moment also returns a cost of `0` which seems wrong.

https://github.com/llvm/llvm-project/pull/144281


More information about the llvm-commits mailing list