[llvm] [VPlan] Compute cost of replicating calls in VPlan. (NFCI) (PR #154291)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 01:47:45 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Implement computing the scalarization overhead for replicating calls in
VPlan, matching the legacy cost model.
Depends on https://github.com/llvm/llvm-project/pull/154126. (Included in the PR)
---
Full diff: https://github.com/llvm/llvm-project/pull/154291.diff
6 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+3-5)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+1-2)
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+28-18)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+2-3)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+11-4)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+33-7)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 9186419715cc4..5b3e42908c58f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -961,12 +961,10 @@ class TargetTransformInfo {
TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
ArrayRef<Value *> VL = {}) const;
- /// Estimate the overhead of scalarizing an instructions unique
- /// non-constant operands. The (potentially vector) types to use for each of
- /// argument are passes via Tys.
+ /// Estimate the overhead of scalarizing operands with the given types. The
+ /// (potentially vector) types to use for each of argument are passes via Tys.
LLVM_ABI InstructionCost getOperandsScalarizationOverhead(
- ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind) const;
+ ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind) const;
/// If target has efficient vector element load/store instructions, it can
/// return true here so that insertion/extraction costs are not added to
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 200cbafbaa6e2..183f1692746ce 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -459,8 +459,7 @@ class TargetTransformInfoImplBase {
}
virtual InstructionCost
- getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys,
+ getOperandsScalarizationOverhead(ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind) const {
return 0;
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index aa9d1f0a1ccea..4a02ae4f8fcfb 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -347,6 +348,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return Cost;
}
+ /// Filter out constant and duplicated entries in \p Ops and return a vector
+ /// containing the corresponding types.
+ static SmallVector<Type *, 4>
+ filterConstantAndDuplicatedOperands(ArrayRef<const Value *> Ops,
+ ArrayRef<Type *> Tys) {
+ SmallPtrSet<const Value *, 4> UniqueOperands;
+ SmallVector<Type *, 4> FilteredTys;
+ for (const auto &[Op, Ty] : zip_equal(Ops, Tys)) {
+ if (isa<Constant>(Op) || !UniqueOperands.insert(Op).second)
+ continue;
+ FilteredTys.push_back(Ty);
+ }
+ return FilteredTys;
+ }
+
protected:
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
: BaseT(DL) {}
@@ -935,29 +951,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
CostKind);
}
- /// Estimate the overhead of scalarizing an instructions unique
- /// non-constant operands. The (potentially vector) types to use for each of
+ /// Estimate the overhead of scalarizing an instructions
+ /// operands. The (potentially vector) types to use for each of
/// argument are passes via Tys.
InstructionCost getOperandsScalarizationOverhead(
- ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind) const override {
- assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
-
+ ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind) const override {
InstructionCost Cost = 0;
- SmallPtrSet<const Value*, 4> UniqueOperands;
- for (int I = 0, E = Args.size(); I != E; I++) {
+ for (Type *Ty : Tys) {
// Disregard things like metadata arguments.
- const Value *A = Args[I];
- Type *Ty = Tys[I];
if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
!Ty->isPtrOrPtrVectorTy())
continue;
- if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
- if (auto *VecTy = dyn_cast<VectorType>(Ty))
- Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
- /*Extract*/ true, CostKind);
- }
+ if (auto *VecTy = dyn_cast<VectorType>(Ty))
+ Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
+ /*Extract*/ true, CostKind);
}
return Cost;
@@ -974,7 +982,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
InstructionCost Cost = getScalarizationOverhead(
RetTy, /*Insert*/ true, /*Extract*/ false, CostKind);
if (!Args.empty())
- Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind);
+ Cost += getOperandsScalarizationOverhead(
+ filterConstantAndDuplicatedOperands(Args, Tys), CostKind);
else
// When no information on arguments is provided, we add the cost
// associated with one argument as a heuristic.
@@ -2156,8 +2165,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/*Insert=*/true, /*Extract=*/false, CostKind);
}
}
- ScalarizationCost +=
- getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
+ ScalarizationCost += getOperandsScalarizationOverhead(
+ filterConstantAndDuplicatedOperands(Args, ICA.getArgTypes()),
+ CostKind);
}
IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3141060a710ce..296209a3f917c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -637,9 +637,8 @@ InstructionCost TargetTransformInfo::getScalarizationOverhead(
}
InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead(
- ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind) const {
- return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind);
+ ArrayRef<Type *> Tys, TTI::TargetCostKind CostKind) const {
+ return TTIImpl->getOperandsScalarizationOverhead(Tys, CostKind);
}
bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8894b1692d562..5c811a1a45b52 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1697,8 +1697,16 @@ class LoopVectorizationCostModel {
/// Returns a range containing only operands needing to be extracted.
SmallVector<Value *, 4> filterExtractingOperands(Instruction::op_range Ops,
ElementCount VF) const {
- return SmallVector<Value *, 4>(make_filter_range(
- Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
+
+ SmallPtrSet<const Value *, 4> UniqueOperands;
+ SmallVector<Value *, 4> Res;
+ for (Value *Op : Ops) {
+ if (isa<Constant>(Op) || !UniqueOperands.insert(Op).second ||
+ !needsExtract(Op, VF))
+ continue;
+ Res.push_back(Op);
+ }
+ return Res;
}
public:
@@ -5610,8 +5618,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
SmallVector<Type *> Tys;
for (auto *V : filterExtractingOperands(Ops, VF))
Tys.push_back(maybeVectorizeType(V->getType(), VF));
- return Cost + TTI.getOperandsScalarizationOverhead(
- filterExtractingOperands(Ops, VF), Tys, CostKind);
+ return Cost + TTI.getOperandsScalarizationOverhead(Tys, CostKind);
}
void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index acd6a97344116..e3b5be879f61f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2975,12 +2975,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// instruction cost.
return 0;
case Instruction::Call: {
- if (!isSingleScalar()) {
- // TODO: Handle remaining call costs here as well.
- if (VF.isScalable())
- return InstructionCost::getInvalid();
- break;
- }
+ if (!isSingleScalar() && VF.isScalable())
+ return InstructionCost::getInvalid();
auto *CalledFn =
cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
@@ -2990,7 +2986,37 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
SmallVector<Type *, 4> Tys;
for (VPValue *ArgOp : drop_end(operands()))
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
- return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+
+ InstructionCost ScalarCallCost =
+ Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
+ if (isSingleScalar())
+ return ScalarCallCost;
+
+ // Compute the cost of scalarizing the result and operands if needed.
+ InstructionCost ScalarizationCost = 0;
+ if (VF.isVector()) {
+ if (!ResultTy->isVoidTy()) {
+ for (Type *VectorTy : getContainedTypes(toVectorizedTy(ResultTy, VF))) {
+ ScalarizationCost += Ctx.TTI.getScalarizationOverhead(
+ cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
+ /*Insert=*/true,
+ /*Extract=*/false, Ctx.CostKind);
+ }
+ }
+ // Compute the cost of scalarizing the operands that require extraction.
+ SmallVector<Type *> Tys;
+ SmallPtrSet<const VPValue *, 4> UniqueOperands;
+ for (auto *Op : drop_end(operands())) {
+ if (isa<VPReplicateRecipe>(Op) || !UniqueOperands.insert(Op).second)
+ continue;
+ Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF));
+ }
+ ScalarizationCost +=
+ Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
+ }
+
+ return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) +
+ ScalarizationCost;
}
case Instruction::Add:
case Instruction::Sub:
``````````
</details>
https://github.com/llvm/llvm-project/pull/154291
More information about the llvm-commits
mailing list