[llvm] [VPlan] Return Max from computeCost without underlying instrs. (PR #109708)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 8 11:49:18 PDT 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/109708

>From 8117ba1e94dfa4c995d496031b6c9fe989bf9b04 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 23 Sep 2024 20:19:35 +0100
Subject: [PATCH 1/3] [VPlan] Return Max from computeCost without underlying
 instrs.

Update computeCost to return InstructionCost::getMax() for recipes
without underlying instructions. Max is used as a sentinel value to
handle cases where there is no underlying instruction. At the moment we
need to catch those cases when -force-target-instruction-cost is passed
to avoid applying the forced cost to auxiliary recipes (like scalar
steps). This is needed to match the legacy behavior.

Unfortunately we cannot use InstructionCost::getInvalid, as this is used
to indicate that an instruction cannot be legalized (e.g. for scalable
vectors).

Alternatively computeCost could return an optional cost.
---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  4 +++-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 +++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0632495bc511cd..6f8a44e1cd1802 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -844,7 +844,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
 protected:
   /// Compute the cost of this recipe either using a recipe's specialized
   /// implementation or using the legacy cost model and the underlying
-  /// instructions.
+  /// instructions. Returns InstructionCost::max() if the cost of this recipe
+  /// should be ignored. Forced target instruction cost is not applied for such
+  /// recipes.
   virtual InstructionCost computeCost(ElementCount VF,
                                       VPCostContext &Ctx) const;
 };
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3f7ab416e877bc..ca4622b74275e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -276,14 +276,6 @@ static Instruction *getInstructionForCost(const VPRecipeBase *R) {
     return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
   if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
     return IG->getInsertPos();
-  // Currently the legacy cost model only calculates the instruction cost with
-  // underlying instruction. Removing the WidenMem here will prevent
-  // force-target-instruction-cost overwriting the cost of recipe with
-  // underlying instruction which is inconsistent with the legacy model.
-  // TODO: Remove WidenMem from this function when we don't need to compare to
-  // the legacy model.
-  if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
-    return &WidenMem->getIngredient();
   return nullptr;
 }
 
@@ -293,9 +285,13 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
     return 0;
 
   InstructionCost RecipeCost = computeCost(VF, Ctx);
-  if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
-      RecipeCost.isValid())
+  if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
+      (RecipeCost.isValid() && RecipeCost != InstructionCost::getMax()))
     RecipeCost = InstructionCost(ForceTargetInstructionCost);
+  // Max cost is used as a sentinel value to detect recipes without underlying
+  // instructions for which no forced target instruction cost should be applied.
+  if (RecipeCost == InstructionCost::getMax())
+    RecipeCost = 0;
 
   LLVM_DEBUG({
     dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
@@ -315,7 +311,9 @@ InstructionCost VPRecipeBase::computeCost(ElementCount VF,
     // transform, avoid computing their cost multiple times for now.
     Ctx.SkipCostComputation.insert(UI);
   }
-  return UI ? Ctx.getLegacyCost(UI, VF) : 0;
+  // Max cost is used as a sentinel value to detect recipes without underlying
+  // instructions for which no forced target instruction cost should be applied.
+  return UI ? Ctx.getLegacyCost(UI, VF) : InstructionCost::getMax();
 }
 
 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {

>From 0bf27606a01112300f237f4ea11287b1ebff6f87 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 6 Oct 2024 13:49:03 +0100
Subject: [PATCH 2/3] !fixup address latest comments, thanks!

---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c7e94336b59a35..56b9f76e1def41 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -275,7 +275,10 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
 }
 
 /// Return the underlying instruction to be used for computing \p R's cost via
-/// the legacy cost model. Return nullptr if there's no suitable instruction.
+/// the legacy cost model. Return nullptr if there's no suitable instruction or
+/// computeCost is already implemented for the recipe and there is no need for
+/// the underlying instruction, i.e. it does not need to be skipped for cost
+/// computations.
 static Instruction *getInstructionForCost(const VPRecipeBase *R) {
   if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
     return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
@@ -295,7 +298,7 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
     RecipeCost = InstructionCost(ForceTargetInstructionCost);
   // Max cost is used as a sentinel value to detect recipes without underlying
   // instructions for which no forced target instruction cost should be applied.
-  if (RecipeCost == InstructionCost::getMax())
+  else if (RecipeCost == InstructionCost::getMax())
     RecipeCost = 0;
 
   LLVM_DEBUG({
@@ -308,16 +311,18 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
 InstructionCost VPRecipeBase::computeCost(ElementCount VF,
                                           VPCostContext &Ctx) const {
   // Compute the cost for the recipe falling back to the legacy cost model using
-  // the underlying instruction. If there is no underlying instruction, returns
-  // 0.
+  // the underlying instruction. If there is no underlying instruction or the
+  // cost is computed by the recipe's computeCost, returns
+  // InstructionCost::getMax. It is used as  a sentinel value to detect recipes
+  // without underlying instructions for which no forced target instruction cost
+  // should be applied.
+
   Instruction *UI = getInstructionForCost(this);
   if (UI && isa<VPReplicateRecipe>(this)) {
     // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
     // transform, avoid computing their cost multiple times for now.
     Ctx.SkipCostComputation.insert(UI);
   }
-  // Max cost is used as a sentinel value to detect recipes without underlying
-  // instructions for which no forced target instruction cost should be applied.
   return UI ? Ctx.getLegacyCost(UI, VF) : InstructionCost::getMax();
 }
 

>From 7fcddd61f2c17870bfe6c8864e94cc0d86d23a33 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 8 Oct 2024 19:25:43 +0100
Subject: [PATCH 3/3] !fixup rework

---
 llvm/lib/Transforms/Vectorize/VPlan.h         | 16 +++-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 76 +++++++++++--------
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index df7124be610adb..cbd326f704e2a7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -828,9 +828,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
 protected:
   /// Compute the cost of this recipe either using a recipe's specialized
   /// implementation or using the legacy cost model and the underlying
-  /// instructions. Returns InstructionCost::max() if the cost of this recipe
-  /// should be ignored. Forced target instruction cost is not applied for such
-  /// recipes.
+  /// instructions.
   virtual InstructionCost computeCost(ElementCount VF,
                                       VPCostContext &Ctx) const;
 };
@@ -917,6 +915,9 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     llvm_unreachable("Unhandled VPDefID");
   }
 
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override;
+
   static inline bool classof(const VPUser *U) {
     auto *R = dyn_cast<VPRecipeBase>(U);
     return R && classof(R);
@@ -1412,6 +1413,9 @@ class VPIRInstruction : public VPRecipeBase {
 
   void execute(VPTransformState &State) override;
 
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override;
+
   Instruction &getInstruction() { return I; }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2328,6 +2332,9 @@ class VPInterleaveRecipe : public VPRecipeBase {
   /// Generate the wide load or store, and shuffles.
   void execute(VPTransformState &State) override;
 
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override;
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   void print(raw_ostream &O, const Twine &Indent,
@@ -2561,6 +2568,9 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
   /// conditional branch.
   void execute(VPTransformState &State) override;
 
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override;
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index eb76c146808471..6151f0d2a0fe0c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -274,32 +274,29 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
   insertBefore(BB, I);
 }
 
-/// Return the underlying instruction to be used for computing \p R's cost via
-/// the legacy cost model. Return nullptr if there's no suitable instruction or
-/// computeCost is already implemented for the recipe and there is no need for
-/// the underlying instruction, i.e. it does not need to be skipped for cost
-/// computations.
-static Instruction *getInstructionForCost(const VPRecipeBase *R) {
-  if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
-    return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
-  if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
-    return IG->getInsertPos();
-  return nullptr;
-}
-
 InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
-  auto *UI = getInstructionForCost(this);
-  if (UI && Ctx.skipCostComputation(UI, VF.isVector()))
-    return 0;
-
-  InstructionCost RecipeCost = computeCost(VF, Ctx);
-  if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
-      (RecipeCost.isValid() && RecipeCost != InstructionCost::getMax()))
-    RecipeCost = InstructionCost(ForceTargetInstructionCost);
-  // Max cost is used as a sentinel value to detect recipes without underlying
-  // instructions for which no forced target instruction cost should be applied.
-  else if (RecipeCost == InstructionCost::getMax())
+  // Get the underlying instruction for the recipe, if there is one. Is is used
+  // to
+  //   * decide if cost computation should be skipped for this recipe
+  //   * apply forced target instr
+  Instruction *UI = [this]() -> Instruction * {
+    if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
+      return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
+    if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
+      return IG->getInsertPos();
+    if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
+      return &WidenMem->getIngredient();
+    return nullptr;
+  }();
+  InstructionCost RecipeCost;
+  if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
     RecipeCost = 0;
+  } else {
+    RecipeCost = computeCost(VF, Ctx);
+    if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
+        RecipeCost.isValid())
+      RecipeCost = InstructionCost(ForceTargetInstructionCost);
+  }
 
   LLVM_DEBUG({
     dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
@@ -310,20 +307,18 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
 
 InstructionCost VPRecipeBase::computeCost(ElementCount VF,
                                           VPCostContext &Ctx) const {
-  // Compute the cost for the recipe falling back to the legacy cost model using
-  // the underlying instruction. If there is no underlying instruction or the
-  // cost is computed by the recipe's computeCost, returns
-  // InstructionCost::getMax. It is used as  a sentinel value to detect recipes
-  // without underlying instructions for which no forced target instruction cost
-  // should be applied.
-
-  Instruction *UI = getInstructionForCost(this);
+  llvm_unreachable("subclasses should implement computeCost");
+}
+
+InstructionCost VPSingleDefRecipe::computeCost(ElementCount VF,
+                                               VPCostContext &Ctx) const {
+  Instruction *UI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
   if (UI && isa<VPReplicateRecipe>(this)) {
     // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
     // transform, avoid computing their cost multiple times for now.
     Ctx.SkipCostComputation.insert(UI);
   }
-  return UI ? Ctx.getLegacyCost(UI, VF) : InstructionCost::getMax();
+  return UI ? Ctx.getLegacyCost(UI, VF) : 0;
 }
 
 FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
@@ -867,6 +862,11 @@ void VPIRInstruction::execute(VPTransformState &State) {
   State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
 }
 
+InstructionCost VPIRInstruction::computeCost(ElementCount VF,
+                                             VPCostContext &Ctx) const {
+  return 0;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
                             VPSlotTracker &SlotTracker) const {
@@ -2159,6 +2159,11 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
   ReplaceInstWithInst(CurrentTerminator, CondBr);
 }
 
+InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
+                                                  VPCostContext &Ctx) const {
+  return 0;
+}
+
 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
   assert(State.Lane && "Predicated instruction PHI works per instance.");
   Instruction *ScalarPredInst =
@@ -2841,6 +2846,11 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
+                                                VPCostContext &Ctx) const {
+  return Ctx.getLegacyCost(IG->getInsertPos(), VF);
+}
+
 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
   Value *Start = getStartValue()->getLiveInIRValue();
   PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");



More information about the llvm-commits mailing list