[llvm] df09879 - [VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). (#154617)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 13:40:51 PDT 2025
Author: Florian Hahn
Date: 2025-08-27T21:40:47+01:00
New Revision: df098796ec188b80ee3816aadc97590c11fed307
URL: https://github.com/llvm/llvm-project/commit/df098796ec188b80ee3816aadc97590c11fed307
DIFF: https://github.com/llvm/llvm-project/commit/df098796ec188b80ee3816aadc97590c11fed307.diff
LOG: [VPlan] Compute cost of intrinsics directly for VPReplicateRecipe (NFCI). (#154617)
Handle intrinsic calls in VPReplicateRecipe::computeCost. There are some
intrinsics pseudo intrinsics for which the computed cost is known zero,
so we handle those up front.
Depends on https://github.com/llvm/llvm-project/pull/154291.
PR: https://github.com/llvm/llvm-project/pull/154617
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2754a389ea12d..bd9a93ed57b8a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3063,17 +3063,42 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
case Instruction::Call: {
auto *CalledFn =
cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
- if (CalledFn->isIntrinsic())
- break;
+ SmallVector<const VPValue *> ArgOps(drop_end(operands()));
SmallVector<Type *, 4> Tys;
- for (VPValue *ArgOp : drop_end(operands()))
+ for (const VPValue *ArgOp : ArgOps)
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
+
+ if (CalledFn->isIntrinsic())
+ // Various pseudo-intrinsics with costs of 0 are scalarized instead of
+ // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
+ switch (CalledFn->getIntrinsicID()) {
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::sideeffect:
+ case Intrinsic::pseudoprobe:
+ case Intrinsic::experimental_noalias_scope_decl: {
+ assert(getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
+ ElementCount::getFixed(1), Ctx) == 0 &&
+ "scalarizing intrinsic should be free");
+ return InstructionCost(0);
+ }
+ default:
+ break;
+ }
+
Type *ResultTy = Ctx.Types.inferScalarType(this);
InstructionCost ScalarCallCost =
Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
- if (isSingleScalar())
+ if (isSingleScalar()) {
+ if (CalledFn->isIntrinsic())
+ ScalarCallCost = std::min(
+ ScalarCallCost,
+ getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
+ ElementCount::getFixed(1), Ctx));
return ScalarCallCost;
+ }
if (VF.isScalable())
return InstructionCost::getInvalid();
@@ -3094,7 +3119,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// incur any overhead.
SmallPtrSet<const VPValue *, 4> UniqueOperands;
Tys.clear();
- for (auto *Op : drop_end(operands())) {
+ for (auto *Op : ArgOps) {
if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
!UniqueOperands.insert(Op).second)
continue;
@@ -3104,8 +3129,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
}
- return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) +
- ScalarizationCost;
+ return ScalarCallCost * VF.getFixedValue() + ScalarizationCost;
}
case Instruction::Add:
case Instruction::Sub:
More information about the llvm-commits
mailing list