[llvm] [VPlan] Compute cost of replicating calls in VPlan. (NFCI) (PR #154291)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 26 01:26:52 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/154291

>From 93dfdaf1a3badf1de801823623f90f40f43ced34 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 18 Aug 2025 16:46:55 +0100
Subject: [PATCH 1/2] [VPlan] Compute cost of replicating calls in VPlan.
 (NFCI)

Implement computing the scalarization overhead for replicating calls in
VPlan, matching the legacy cost model.

Depends on https://github.com/llvm/llvm-project/pull/154126.
---
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 44 +++++++++++++++----
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f8fde0500b77a..c3cfd15d9a6ea 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3002,13 +3002,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
     // instruction cost.
     return 0;
   case Instruction::Call: {
-    if (!isSingleScalar()) {
-      // TODO: Handle remaining call costs here as well.
-      if (VF.isScalable())
-        return InstructionCost::getInvalid();
-      break;
-    }
-
     auto *CalledFn =
         cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
     if (CalledFn->isIntrinsic())
@@ -3017,8 +3010,43 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
     SmallVector<Type *, 4> Tys;
     for (VPValue *ArgOp : drop_end(operands()))
       Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
+
     Type *ResultTy = Ctx.Types.inferScalarType(this);
-    return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+    InstructionCost ScalarCallCost =
+        Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+    if (isSingleScalar())
+      return ScalarCallCost;
+
+    if (VF.isScalable())
+      return InstructionCost::getInvalid();
+
+    // Compute the cost of scalarizing the result and operands if needed.
+    InstructionCost ScalarizationCost = 0;
+    if (VF.isVector()) {
+      if (!ResultTy->isVoidTy()) {
+        for (Type *VectorTy : getContainedTypes(toVectorizedTy(ResultTy, VF))) {
+          ScalarizationCost += Ctx.TTI.getScalarizationOverhead(
+              cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
+              /*Insert=*/true,
+              /*Extract=*/false, Ctx.CostKind);
+        }
+      }
+      // Skip operands that do not require extraction/scalarization and do not
+      // incur any overhead.
+      SmallVector<Type *> Tys;
+      SmallPtrSet<const VPValue *, 4> UniqueOperands;
+      for (auto *Op : drop_end(operands())) {
+        if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
+            !UniqueOperands.insert(Op).second)
+          continue;
+        Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF));
+      }
+      ScalarizationCost +=
+          Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
+    }
+
+    return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) +
+           ScalarizationCost;
   }
   case Instruction::Add:
   case Instruction::Sub:

>From 5fdb4e5d523e2675527880c10368486478276ef7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 26 Aug 2025 09:26:16 +0100
Subject: [PATCH 2/2] !fixup use clear instead of new smallvector.

---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 097f8c2b4d3bf..5bbf8ccca4c32 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3040,7 +3040,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
     SmallVector<Type *, 4> Tys;
     for (VPValue *ArgOp : drop_end(operands()))
       Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
-
     Type *ResultTy = Ctx.Types.inferScalarType(this);
     InstructionCost ScalarCallCost =
         Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
@@ -3063,8 +3062,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
       }
       // Skip operands that do not require extraction/scalarization and do not
       // incur any overhead.
-      SmallVector<Type *> Tys;
       SmallPtrSet<const VPValue *, 4> UniqueOperands;
+      Tys.clear();
       for (auto *Op : drop_end(operands())) {
         if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
             !UniqueOperands.insert(Op).second)



More information about the llvm-commits mailing list