[llvm] [VPlan] Factor out logic to common compute costs to helper (NFCI). (PR #153361)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 03:08:16 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/153361
>From 59d2dbf5057ca74317a5209bf83d32a5c9365572 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 12 Aug 2025 21:30:11 +0100
Subject: [PATCH 1/3] [VPlan] Factor out logic to common compute costs to
helper (NFCI).
A number of recipes compute costs for the same opcodes for scalars or
vectors, depending on the recipe.
Move the common logic out to a helper in VPRecipeWithIRFlags, that is
then used by VPReplicateRecipe, VPWidenRecipe and VPInstruction.
This makes it easier to cover all relevant opcodes, without
duplication.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 7 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 149 ++++++++++--------
2 files changed, 88 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 11a7d8b339ae9..869b3ea769737 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -898,6 +898,13 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
}
void execute(VPTransformState &State) override = 0;
+
+ /// Compute the cost for this recipe using \p Opcode. If \p IsVector is true,
+ /// compute the vector cost, otherwise compute the scalar cost for a single
+ /// instance.
+ std::optional<InstructionCost> getCommonCost(unsigned Opcode, ElementCount VF,
+ VPCostContext &Ctx,
+ bool IsVector) const;
};
/// Helper to access the operand that contains the unroll part for this recipe
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 23c10d2b25263..4192bccb0a1fd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -940,28 +940,88 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
}
+std::optional<InstructionCost>
+VPRecipeWithIRFlags::getCommonCost(unsigned Opcode, ElementCount VF,
+ VPCostContext &Ctx, bool IsVector) const {
+ Type *ScalarTy = Ctx.Types.inferScalarType(this);
+ Type *ResultTy = IsVector ? toVectorTy(ScalarTy, VF) : ScalarTy;
+ switch (Opcode) {
+ case Instruction::FNeg:
+ return Ctx.TTI.getArithmeticInstrCost(
+ Opcode, ResultTy, Ctx.CostKind,
+ {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
+ {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ VPValue *RHS = getOperand(1);
+ // Certain instructions can be cheaper to vectorize if they have a constant
+ // second vector operand. One example of this are shifts on x86.
+ TargetTransformInfo::OperandValueInfo RHSInfo = Ctx.getOperandInfo(RHS);
+
+ if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
+ getOperand(1)->isDefinedOutsideLoopRegions())
+ RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
+ Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+
+ SmallVector<const Value *, 4> Operands;
+ if (CtxI)
+ Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
+ return Ctx.TTI.getArithmeticInstrCost(
+ Opcode, ResultTy, Ctx.CostKind,
+ {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
+ RHSInfo, Operands, CtxI, &Ctx.TLI);
+ }
+ case Instruction::Freeze:
+ // This opcode is unknown. Assume that it is the same as 'mul'.
+ return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, ResultTy,
+ Ctx.CostKind);
+ case Instruction::ExtractValue:
+ return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
+ Ctx.CostKind);
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Type *ScalarOpTy = Ctx.Types.inferScalarType(getOperand(0));
+ Type *OpTy = IsVector ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;
+ Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+ return Ctx.TTI.getCmpSelInstrCost(
+ Opcode, OpTy, CmpInst::makeCmpResultType(OpTy), getPredicate(),
+ Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+ {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
+ }
+ }
+ return std::nullopt;
+}
+
InstructionCost VPInstruction::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
if (Instruction::isBinaryOp(getOpcode())) {
- Type *ResTy = Ctx.Types.inferScalarType(this);
- if (!vputils::onlyFirstLaneUsed(this))
- ResTy = toVectorTy(ResTy, VF);
-
- if (!getUnderlyingValue()) {
- switch (getOpcode()) {
- case Instruction::FMul:
- return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
- default:
- // TODO: Compute cost for VPInstructions without underlying values once
- // the legacy cost model has been retired.
- return 0;
- }
+ if (!getUnderlyingValue() && getOpcode() != Instruction::FMul) {
+ // TODO: Compute cost for VPInstructions without underlying values once
+ // the legacy cost model has been retired.
+ return 0;
}
assert(!doesGeneratePerAllLanes() &&
"Should only generate a vector value or single scalar, not scalars "
"for all lanes.");
- return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
+ return *getCommonCost(getOpcode(), VF, Ctx,
+ /*IsVector=*/!vputils::onlyFirstLaneUsed(this));
}
switch (getOpcode()) {
@@ -2025,20 +2085,13 @@ void VPWidenRecipe::execute(VPTransformState &State) {
InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
switch (Opcode) {
- case Instruction::FNeg: {
- Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
- return Ctx.TTI.getArithmeticInstrCost(
- Opcode, VectorTy, Ctx.CostKind,
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
- }
-
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// More complex computation, let the legacy cost-model handle this for now.
return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -2052,45 +2105,12 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor: {
- VPValue *RHS = getOperand(1);
- // Certain instructions can be cheaper to vectorize if they have a constant
- // second vector operand. One example of this are shifts on x86.
- TargetTransformInfo::OperandValueInfo RHSInfo = Ctx.getOperandInfo(RHS);
-
- if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
- getOperand(1)->isDefinedOutsideLoopRegions())
- RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
- Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
- Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
-
- SmallVector<const Value *, 4> Operands;
- if (CtxI)
- Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
- return Ctx.TTI.getArithmeticInstrCost(
- Opcode, VectorTy, Ctx.CostKind,
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
- RHSInfo, Operands, CtxI, &Ctx.TLI);
- }
- case Instruction::Freeze: {
- // This opcode is unknown. Assume that it is the same as 'mul'.
- Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
- return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
- Ctx.CostKind);
- }
- case Instruction::ExtractValue: {
- return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
- Ctx.CostKind);
- }
+ case Instruction::Xor:
+ case Instruction::Freeze:
+ case Instruction::ExtractValue:
case Instruction::ICmp:
- case Instruction::FCmp: {
- Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
- Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
- return Ctx.TTI.getCmpSelInstrCost(
- Opcode, VectorTy, CmpInst::makeCmpResultType(VectorTy), getPredicate(),
- Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
- {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
- }
+ case Instruction::FCmp:
+ return *getCommonCost(getOpcode(), VF, Ctx, /*IsVector=*/true);
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -2964,8 +2984,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// transform, avoid computing their cost multiple times for now.
Ctx.SkipCostComputation.insert(UI);
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- Type *ResultTy = Ctx.Types.inferScalarType(this);
switch (UI->getOpcode()) {
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because the cost of GEPs in
@@ -2987,12 +3005,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- auto Op2Info = Ctx.getOperandInfo(getOperand(1));
- SmallVector<const Value *, 4> Operands(UI->operand_values());
- return Ctx.TTI.getArithmeticInstrCost(
- UI->getOpcode(), ResultTy, CostKind,
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
- Op2Info, Operands, UI, &Ctx.TLI) *
+ return *getCommonCost(getOpcode(), VF, Ctx, /*IsVector=*/false) *
(isSingleScalar() ? 1 : VF.getFixedValue());
}
}
>From 0d082efd51af701464326b70b6acfb372b9e0a8a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 18 Aug 2025 11:46:35 +0100
Subject: [PATCH 2/3] !fixup address comments
---
llvm/lib/Transforms/Vectorize/VPlan.h | 7 +--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 43 ++++++++++---------
2 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 821d29261a1c3..db60aa1bfceac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -899,12 +899,9 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
void execute(VPTransformState &State) override = 0;
- /// Compute the cost for this recipe using \p Opcode. If \p IsVector is true,
- /// compute the vector cost, otherwise compute the scalar cost for a single
- /// instance.
+ /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
std::optional<InstructionCost> getCommonCost(unsigned Opcode, ElementCount VF,
- VPCostContext &Ctx,
- bool IsVector) const;
+ VPCostContext &Ctx) const;
};
/// Helper to access the operand that contains the unroll part for this recipe
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index de949f830fa75..143ed8909188f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -942,15 +942,12 @@ Value *VPInstruction::generate(VPTransformState &State) {
std::optional<InstructionCost>
VPRecipeWithIRFlags::getCommonCost(unsigned Opcode, ElementCount VF,
- VPCostContext &Ctx, bool IsVector) const {
+ VPCostContext &Ctx) const {
Type *ScalarTy = Ctx.Types.inferScalarType(this);
- Type *ResultTy = IsVector ? toVectorTy(ScalarTy, VF) : ScalarTy;
+ Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy;
switch (Opcode) {
case Instruction::FNeg:
- return Ctx.TTI.getArithmeticInstrCost(
- Opcode, ResultTy, Ctx.CostKind,
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
- {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
+ return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
@@ -969,16 +966,21 @@ VPRecipeWithIRFlags::getCommonCost(unsigned Opcode, ElementCount VF,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- VPValue *RHS = getOperand(1);
- // Certain instructions can be cheaper to vectorize if they have a constant
- // second vector operand. One example of this are shifts on x86.
- TargetTransformInfo::OperandValueInfo RHSInfo = Ctx.getOperandInfo(RHS);
-
- if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
- getOperand(1)->isDefinedOutsideLoopRegions())
- RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
- Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+ TargetTransformInfo::OperandValueInfo RHSInfo = {
+ TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
+
+ if (VF.isVector()) {
+ // Certain instructions can be cheaper to vectorize if they have a
+ // constant second vector operand. One example of this are shifts on x86.
+ VPValue *RHS = getOperand(1);
+ RHSInfo = Ctx.getOperandInfo(RHS);
+
+ if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
+ getOperand(1)->isDefinedOutsideLoopRegions())
+ RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
+ }
+ Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
SmallVector<const Value *, 4> Operands;
if (CtxI)
Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
@@ -997,7 +999,7 @@ VPRecipeWithIRFlags::getCommonCost(unsigned Opcode, ElementCount VF,
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ScalarOpTy = Ctx.Types.inferScalarType(getOperand(0));
- Type *OpTy = IsVector ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;
+ Type *OpTy = VF.isVector() ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
return Ctx.TTI.getCmpSelInstrCost(
Opcode, OpTy, CmpInst::makeCmpResultType(OpTy), getPredicate(),
@@ -1020,8 +1022,9 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
assert(!doesGeneratePerAllLanes() &&
"Should only generate a vector value or single scalar, not scalars "
"for all lanes.");
- return *getCommonCost(getOpcode(), VF, Ctx,
- /*IsVector=*/!vputils::onlyFirstLaneUsed(this));
+ return *getCommonCost(
+ getOpcode(),
+ vputils::onlyFirstLaneUsed(this) ? ElementCount::getFixed(1) : VF, Ctx);
}
switch (getOpcode()) {
@@ -2110,7 +2113,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
case Instruction::ExtractValue:
case Instruction::ICmp:
case Instruction::FCmp:
- return *getCommonCost(getOpcode(), VF, Ctx, /*IsVector=*/true);
+ return *getCommonCost(getOpcode(), VF, Ctx);
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -3005,7 +3008,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- return *getCommonCost(getOpcode(), VF, Ctx, /*IsVector=*/false) *
+ return *getCommonCost(getOpcode(), ElementCount::getFixed(1), Ctx) *
(isSingleScalar() ? 1 : VF.getFixedValue());
}
}
>From 0f4ac2148d7b39bf76fe8b3b5826109dc8514c25 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 20 Aug 2025 11:00:09 +0100
Subject: [PATCH 3/3] !fixup use more specific name
---
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +++--
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 13 +++++++------
2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6d2ec0545bfb6..367a6ebf48dc2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -900,8 +900,9 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
void execute(VPTransformState &State) override = 0;
/// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
- std::optional<InstructionCost> getCommonCost(unsigned Opcode, ElementCount VF,
- VPCostContext &Ctx) const;
+ std::optional<InstructionCost>
+ getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF,
+ VPCostContext &Ctx) const;
};
/// Helper to access the operand that contains the unroll part for this recipe
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 053539fb3aa13..f8fde0500b77a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -942,9 +942,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
}
-std::optional<InstructionCost>
-VPRecipeWithIRFlags::getCommonCost(unsigned Opcode, ElementCount VF,
- VPCostContext &Ctx) const {
+std::optional<InstructionCost> VPRecipeWithIRFlags::getCostForRecipeWithOpcode(
+ unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const {
Type *ScalarTy = Ctx.Types.inferScalarType(this);
Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy;
switch (Opcode) {
@@ -1024,7 +1023,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
assert(!doesGeneratePerAllLanes() &&
"Should only generate a vector value or single scalar, not scalars "
"for all lanes.");
- return *getCommonCost(
+ return *getCostForRecipeWithOpcode(
getOpcode(),
vputils::onlyFirstLaneUsed(this) ? ElementCount::getFixed(1) : VF, Ctx);
}
@@ -2121,7 +2120,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
case Instruction::ExtractValue:
case Instruction::ICmp:
case Instruction::FCmp:
- return *getCommonCost(getOpcode(), VF, Ctx);
+ return *getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -3018,6 +3017,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
SmallVector<Type *, 4> Tys;
for (VPValue *ArgOp : drop_end(operands()))
Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
+ Type *ResultTy = Ctx.Types.inferScalarType(this);
return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
}
case Instruction::Add:
@@ -3034,7 +3034,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- return *getCommonCost(getOpcode(), ElementCount::getFixed(1), Ctx) *
+ return *getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),
+ Ctx) *
(isSingleScalar() ? 1 : VF.getFixedValue());
}
}
More information about the llvm-commits
mailing list