[llvm] [VPlan] Compute cost for most opcodes in VPWidenRecipe (NFCI). (PR #98764)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 09:40:42 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/98764
>From 5527585703df2eef72e51e57fcc1acf60393ff68 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 12 Jul 2024 14:49:45 +0100
Subject: [PATCH 1/2] [VPlan] Compute cost for most opcodes in VPWidenRecipe
(NFCI).
---
.../Transforms/Vectorize/LoopVectorize.cpp | 3 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 15 +++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 74 +++++++++++++++++++
3 files changed, 87 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7d37d67cde29c..c4f4fb4d6952a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7279,7 +7279,8 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
InstructionCost Cost = 0;
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
- VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(),
+ LLVMCtx, CM);
// Cost modeling for inductions is inaccurate in the legacy cost model
// compared to the recipes that are generated. To match here initially during
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e3b78700e1018..d5b5a28edf20f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -736,14 +736,16 @@ class VPLiveOut : public VPUser {
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
+ const TargetLibraryInfo &TLI;
VPTypeAnalysis Types;
LLVMContext &LLVMCtx;
LoopVectorizationCostModel &CM;
SmallPtrSet<Instruction *, 8> SkipCostComputation;
- VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
- LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
- : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
+ VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
+ Type *CanIVTy, LLVMContext &LLVMCtx,
+ LoopVectorizationCostModel &CM)
+ : TTI(TTI), TLI(TLI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
@@ -862,7 +864,8 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
protected:
/// Compute the cost of this recipe using the legacy cost model and the
/// underlying instructions.
- InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
+ virtual InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const;
};
// Helper macro to define common classof implementations for recipes.
@@ -1423,6 +1426,10 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
/// Produce widened copies of all Ingredients.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
unsigned getOpcode() const { return Opcode; }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 53d91ee27b73f..23bb8737210fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1137,6 +1137,80 @@ void VPWidenRecipe::execute(VPTransformState &State) {
#endif
}
+InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ switch (Opcode) {
+ case Instruction::FNeg: {
+ Type *VectorTy =
+ ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
+ return Ctx.TTI.getArithmeticInstrCost(
+ Opcode, VectorTy, CostKind,
+ {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
+ {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
+ }
+
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // More complex computation, let the legacy cost-model handle this for now.
+ return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ VPValue *Op2 = getOperand(1);
+ // Certain instructions can be cheaper to vectorize if they have a constant
+ // second vector operand. One example of this are shifts on x86.
+ TargetTransformInfo::OperandValueInfo Op2Info = {
+ TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
+ if (Op2->isLiveIn())
+ Op2Info = Ctx.TTI.getOperandInfo(Op2->getLiveInIRValue());
+
+ if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+ getOperand(1)->isDefinedOutsideVectorRegions())
+ Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
+ Type *VectorTy =
+ ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
+ Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+
+ SmallVector<const Value *, 4> Operands;
+ if (CtxI)
+ Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
+ return Ctx.TTI.getArithmeticInstrCost(
+ Opcode, VectorTy, CostKind,
+ {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
+ Op2Info, Operands, CtxI, &Ctx.TLI);
+ }
+ case Instruction::Freeze: {
+ // This opcode is unknown. Assume that it is the same as 'mul'.
+ Type *VectorTy =
+ ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
+ return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+ Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
+ return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
+ CostKind, CtxI);
+ }
+ default:
+ llvm_unreachable("Unsupported opcode for instruction");
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
>From 0b5092acc6de8f15eea19b9b96c90d7f20ab38f8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 24 Jul 2024 17:39:46 +0100
Subject: [PATCH 2/2] !fixup adjust comments, Op2->RHS.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 7 ++++---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 +++++++-------
2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index d93906ae167c9..f91a2f0cbab55 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -798,7 +798,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Return the cost of this recipe, taking into account if the cost
/// computation should be skipped and the ForceTargetInstructionCost flag.
/// Also takes care of printing the cost for debugging.
- virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
+ InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
@@ -862,8 +862,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
DebugLoc getDebugLoc() const { return DL; }
protected:
- /// Compute the cost of this recipe using the legacy cost model and the
- /// underlying instructions.
+ /// Compute the cost of this recipe either using a recipe's specialized
+ /// implementation or using the legacy cost model and the underlying
+ /// instructions.
virtual InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const;
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2061fc5e0b072..142749276be89 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1173,17 +1173,17 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- VPValue *Op2 = getOperand(1);
+ VPValue *RHS = getOperand(1);
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
- TargetTransformInfo::OperandValueInfo Op2Info = {
+ TargetTransformInfo::OperandValueInfo RHSInfo = {
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
- if (Op2->isLiveIn())
- Op2Info = Ctx.TTI.getOperandInfo(Op2->getLiveInIRValue());
+ if (RHS->isLiveIn())
+ RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
- if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+ if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
getOperand(1)->isDefinedOutsideVectorRegions())
- Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
+ RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;
Type *VectorTy =
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
@@ -1194,7 +1194,7 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getArithmeticInstrCost(
Opcode, VectorTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
- Op2Info, Operands, CtxI, &Ctx.TLI);
+ RHSInfo, Operands, CtxI, &Ctx.TLI);
}
case Instruction::Freeze: {
// This opcode is unknown. Assume that it is the same as 'mul'.
More information about the llvm-commits
mailing list