[llvm] [VPlan] Compute cost of replicating calls in VPlan. (NFCI) (PR #154291)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 26 01:26:52 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/154291
>From 93dfdaf1a3badf1de801823623f90f40f43ced34 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 18 Aug 2025 16:46:55 +0100
Subject: [PATCH 1/2] [VPlan] Compute cost of replicating calls in VPlan.
(NFCI)
Implement computing the scalarization overhead for replicating calls in
VPlan, matching the legacy cost model.
Depends on https://github.com/llvm/llvm-project/pull/154126.
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 44 +++++++++++++++----
1 file changed, 36 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f8fde0500b77a..c3cfd15d9a6ea 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3002,13 +3002,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// instruction cost.
return 0;
case Instruction::Call: {
- if (!isSingleScalar()) {
- // TODO: Handle remaining call costs here as well.
- if (VF.isScalable())
- return InstructionCost::getInvalid();
- break;
- }
-
auto *CalledFn =
cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
if (CalledFn->isIntrinsic())
@@ -3017,8 +3010,43 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
SmallVector<Type *, 4> Tys;
for (VPValue *ArgOp : drop_end(operands()))
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
+
Type *ResultTy = Ctx.Types.inferScalarType(this);
- return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+ InstructionCost ScalarCallCost =
+ Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+ if (isSingleScalar())
+ return ScalarCallCost;
+
+ if (VF.isScalable())
+ return InstructionCost::getInvalid();
+
+ // Compute the cost of scalarizing the result and operands if needed.
+ InstructionCost ScalarizationCost = 0;
+ if (VF.isVector()) {
+ if (!ResultTy->isVoidTy()) {
+ for (Type *VectorTy : getContainedTypes(toVectorizedTy(ResultTy, VF))) {
+ ScalarizationCost += Ctx.TTI.getScalarizationOverhead(
+ cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
+ /*Insert=*/true,
+ /*Extract=*/false, Ctx.CostKind);
+ }
+ }
+ // Skip operands that do not require extraction/scalarization and do not
+ // incur any overhead.
+ SmallVector<Type *> Tys;
+ SmallPtrSet<const VPValue *, 4> UniqueOperands;
+ for (auto *Op : drop_end(operands())) {
+ if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
+ !UniqueOperands.insert(Op).second)
+ continue;
+ Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF));
+ }
+ ScalarizationCost +=
+ Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
+ }
+
+ return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) +
+ ScalarizationCost;
}
case Instruction::Add:
case Instruction::Sub:
>From 5fdb4e5d523e2675527880c10368486478276ef7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 26 Aug 2025 09:26:16 +0100
Subject: [PATCH 2/2] !fixup use clear instead of new smallvector.
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 097f8c2b4d3bf..5bbf8ccca4c32 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3040,7 +3040,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
SmallVector<Type *, 4> Tys;
for (VPValue *ArgOp : drop_end(operands()))
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
-
Type *ResultTy = Ctx.Types.inferScalarType(this);
InstructionCost ScalarCallCost =
Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
@@ -3063,8 +3062,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
}
// Skip operands that do not require extraction/scalarization and do not
// incur any overhead.
- SmallVector<Type *> Tys;
SmallPtrSet<const VPValue *, 4> UniqueOperands;
+ Tys.clear();
for (auto *Op : drop_end(operands())) {
if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
!UniqueOperands.insert(Op).second)
More information about the llvm-commits
mailing list