[llvm] [VPlan] Return Max from computeCost without underlying instrs. (PR #109708)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 11:49:18 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/109708
>From 8117ba1e94dfa4c995d496031b6c9fe989bf9b04 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 23 Sep 2024 20:19:35 +0100
Subject: [PATCH 1/3] [VPlan] Return Max from computeCost without underlying
instrs.
Update computeCost to return InstructionCost::getMax() for recipes
without underlying instructions. Max is used as a sentinel value to
handle cases where there is no underlying instruction. At the moment we
need to catch those cases when -force-target-instruction-cost is passed
to avoid applying the forced cost to auxiliary recipes (like scalar
steps). This is needed to match the legacy behavior.
Unfortunately we cannot use InstructionCost::getInvalid, as this is used
to indicate that an instruction cannot be legalized (e.g. for scalable
vectors).
Alternatively computeCost could return an optional cost.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 4 +++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 +++++++++----------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0632495bc511cd..6f8a44e1cd1802 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -844,7 +844,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
protected:
/// Compute the cost of this recipe either using a recipe's specialized
/// implementation or using the legacy cost model and the underlying
- /// instructions.
+ /// instructions. Returns InstructionCost::max() if the cost of this recipe
+ /// should be ignored. Forced target instruction cost is not applied for such
+ /// recipes.
virtual InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const;
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3f7ab416e877bc..ca4622b74275e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -276,14 +276,6 @@ static Instruction *getInstructionForCost(const VPRecipeBase *R) {
return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
return IG->getInsertPos();
- // Currently the legacy cost model only calculates the instruction cost with
- // underlying instruction. Removing the WidenMem here will prevent
- // force-target-instruction-cost overwriting the cost of recipe with
- // underlying instruction which is inconsistent with the legacy model.
- // TODO: Remove WidenMem from this function when we don't need to compare to
- // the legacy model.
- if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
- return &WidenMem->getIngredient();
return nullptr;
}
@@ -293,9 +285,13 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
return 0;
InstructionCost RecipeCost = computeCost(VF, Ctx);
- if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
- RecipeCost.isValid())
+ if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
+ (RecipeCost.isValid() && RecipeCost != InstructionCost::getMax()))
RecipeCost = InstructionCost(ForceTargetInstructionCost);
+ // Max cost is used as a sentinel value to detect recipes without underlying
+ // instructions for which no forced target instruction cost should be applied.
+ if (RecipeCost == InstructionCost::getMax())
+ RecipeCost = 0;
LLVM_DEBUG({
dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
@@ -315,7 +311,9 @@ InstructionCost VPRecipeBase::computeCost(ElementCount VF,
// transform, avoid computing their cost multiple times for now.
Ctx.SkipCostComputation.insert(UI);
}
- return UI ? Ctx.getLegacyCost(UI, VF) : 0;
+ // Max cost is used as a sentinel value to detect recipes without underlying
+ // instructions for which no forced target instruction cost should be applied.
+ return UI ? Ctx.getLegacyCost(UI, VF) : InstructionCost::getMax();
}
FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
>From 0bf27606a01112300f237f4ea11287b1ebff6f87 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 6 Oct 2024 13:49:03 +0100
Subject: [PATCH 2/3] !fixup address latest comments, thanks!
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c7e94336b59a35..56b9f76e1def41 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -275,7 +275,10 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
}
/// Return the underlying instruction to be used for computing \p R's cost via
-/// the legacy cost model. Return nullptr if there's no suitable instruction.
+/// the legacy cost model. Return nullptr if there's no suitable instruction or
+/// computeCost is already implemented for the recipe and there is no need for
+/// the underlying instruction, i.e. it does not need to be skipped for cost
+/// computations.
static Instruction *getInstructionForCost(const VPRecipeBase *R) {
if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
@@ -295,7 +298,7 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
RecipeCost = InstructionCost(ForceTargetInstructionCost);
// Max cost is used as a sentinel value to detect recipes without underlying
// instructions for which no forced target instruction cost should be applied.
- if (RecipeCost == InstructionCost::getMax())
+ else if (RecipeCost == InstructionCost::getMax())
RecipeCost = 0;
LLVM_DEBUG({
@@ -308,16 +311,18 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
InstructionCost VPRecipeBase::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
// Compute the cost for the recipe falling back to the legacy cost model using
- // the underlying instruction. If there is no underlying instruction, returns
- // 0.
+ // the underlying instruction. If there is no underlying instruction or the
+ // cost is computed by the recipe's computeCost, returns
+ // InstructionCost::getMax. It is used as a sentinel value to detect recipes
+ // without underlying instructions for which no forced target instruction cost
+ // should be applied.
+
Instruction *UI = getInstructionForCost(this);
if (UI && isa<VPReplicateRecipe>(this)) {
// VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
// transform, avoid computing their cost multiple times for now.
Ctx.SkipCostComputation.insert(UI);
}
- // Max cost is used as a sentinel value to detect recipes without underlying
- // instructions for which no forced target instruction cost should be applied.
return UI ? Ctx.getLegacyCost(UI, VF) : InstructionCost::getMax();
}
>From 7fcddd61f2c17870bfe6c8864e94cc0d86d23a33 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 8 Oct 2024 19:25:43 +0100
Subject: [PATCH 3/3] !fixup rework
---
llvm/lib/Transforms/Vectorize/VPlan.h | 16 +++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 76 +++++++++++--------
2 files changed, 56 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index df7124be610adb..cbd326f704e2a7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -828,9 +828,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
protected:
/// Compute the cost of this recipe either using a recipe's specialized
/// implementation or using the legacy cost model and the underlying
- /// instructions. Returns InstructionCost::max() if the cost of this recipe
- /// should be ignored. Forced target instruction cost is not applied for such
- /// recipes.
+ /// instructions.
virtual InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const;
};
@@ -917,6 +915,9 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
llvm_unreachable("Unhandled VPDefID");
}
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && classof(R);
@@ -1412,6 +1413,9 @@ class VPIRInstruction : public VPRecipeBase {
void execute(VPTransformState &State) override;
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
Instruction &getInstruction() { return I; }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2328,6 +2332,9 @@ class VPInterleaveRecipe : public VPRecipeBase {
/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -2561,6 +2568,9 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
/// conditional branch.
void execute(VPTransformState &State) override;
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index eb76c146808471..6151f0d2a0fe0c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -274,32 +274,29 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
insertBefore(BB, I);
}
-/// Return the underlying instruction to be used for computing \p R's cost via
-/// the legacy cost model. Return nullptr if there's no suitable instruction or
-/// computeCost is already implemented for the recipe and there is no need for
-/// the underlying instruction, i.e. it does not need to be skipped for cost
-/// computations.
-static Instruction *getInstructionForCost(const VPRecipeBase *R) {
- if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
- return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
- if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
- return IG->getInsertPos();
- return nullptr;
-}
-
InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
- auto *UI = getInstructionForCost(this);
- if (UI && Ctx.skipCostComputation(UI, VF.isVector()))
- return 0;
-
- InstructionCost RecipeCost = computeCost(VF, Ctx);
- if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
- (RecipeCost.isValid() && RecipeCost != InstructionCost::getMax()))
- RecipeCost = InstructionCost(ForceTargetInstructionCost);
- // Max cost is used as a sentinel value to detect recipes without underlying
- // instructions for which no forced target instruction cost should be applied.
- else if (RecipeCost == InstructionCost::getMax())
+ // Get the underlying instruction for the recipe, if there is one. Is is used
+ // to
+ // * decide if cost computation should be skipped for this recipe
+ // * apply forced target instr
+ Instruction *UI = [this]() -> Instruction * {
+ if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
+ return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
+ if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
+ return IG->getInsertPos();
+ if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
+ return &WidenMem->getIngredient();
+ return nullptr;
+ }();
+ InstructionCost RecipeCost;
+ if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
RecipeCost = 0;
+ } else {
+ RecipeCost = computeCost(VF, Ctx);
+ if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
+ RecipeCost.isValid())
+ RecipeCost = InstructionCost(ForceTargetInstructionCost);
+ }
LLVM_DEBUG({
dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
@@ -310,20 +307,18 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
InstructionCost VPRecipeBase::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
- // Compute the cost for the recipe falling back to the legacy cost model using
- // the underlying instruction. If there is no underlying instruction or the
- // cost is computed by the recipe's computeCost, returns
- // InstructionCost::getMax. It is used as a sentinel value to detect recipes
- // without underlying instructions for which no forced target instruction cost
- // should be applied.
-
- Instruction *UI = getInstructionForCost(this);
+ llvm_unreachable("subclasses should implement computeCost");
+}
+
+InstructionCost VPSingleDefRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ Instruction *UI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
if (UI && isa<VPReplicateRecipe>(this)) {
// VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
// transform, avoid computing their cost multiple times for now.
Ctx.SkipCostComputation.insert(UI);
}
- return UI ? Ctx.getLegacyCost(UI, VF) : InstructionCost::getMax();
+ return UI ? Ctx.getLegacyCost(UI, VF) : 0;
}
FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
@@ -867,6 +862,11 @@ void VPIRInstruction::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
}
+InstructionCost VPIRInstruction::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ return 0;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -2159,6 +2159,11 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
ReplaceInstWithInst(CurrentTerminator, CondBr);
}
+InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ return 0;
+}
+
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
assert(State.Lane && "Predicated instruction PHI works per instance.");
Instruction *ScalarPredInst =
@@ -2841,6 +2846,11 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ return Ctx.getLegacyCost(IG->getInsertPos(), VF);
+}
+
void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");
More information about the llvm-commits
mailing list