[llvm] [VPlan] Compute cost of replicating calls in VPlan. (NFCI) (PR #154291)

Tue Aug 19 02:31:43 PDT 2025

================
@@ -2990,7 +2986,37 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
     SmallVector<Type *, 4> Tys;
     for (VPValue *ArgOp : drop_end(operands()))
       Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
-    return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+
+    InstructionCost ScalarCallCost =
+        Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+    if (isSingleScalar())
+      return ScalarCallCost;
+
+    // Compute the cost of scalarizing the result and operands if needed.
+    InstructionCost ScalarizationCost = 0;
+    if (VF.isVector()) {
+      if (!ResultTy->isVoidTy()) {
+        for (Type *VectorTy : getContainedTypes(toVectorizedTy(ResultTy, VF))) {
+          ScalarizationCost += Ctx.TTI.getScalarizationOverhead(
+              cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
+              /*Insert=*/true,
+              /*Extract=*/false, Ctx.CostKind);
+        }
+      }
+      // Compute the cost of scalarizing the operands that require extraction.
+      SmallVector<Type *> Tys;
+      SmallPtrSet<const VPValue *, 4> UniqueOperands;
+      for (auto *Op : drop_end(operands())) {
+        if (isa<VPReplicateRecipe>(Op) || !UniqueOperands.insert(Op).second)
+          continue;
+        Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF));
+      }
+      ScalarizationCost +=
+          Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
+    }
+
+    return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) +
+           ScalarizationCost;
----------------
SamTebbs33 wrote:

I could be mis-reading the indentation here, but if it was single scalar, wouldn't this case have exited early at line 2992?

https://github.com/llvm/llvm-project/pull/154291