[llvm] [VPlan][NFC] Add cost for `VPWidenMemoryRecipe`. (PR #105614)

Sun Sep 1 17:08:30 PDT 2024

https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/105614

>From d72e70caf3aee9abfad626ca84b1f5b0406a7d6d Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 21 Aug 2024 20:39:31 -0700
Subject: [PATCH 1/4] [VPlan] Add cost for `VPWidenMemoryRecipe`

In this patch, we add the `computeCost()` function for `VPWidenMemoryRecipe`.
---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  7 +++++
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 31 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 36a1aa08654d5b..b3edf7c4b51685 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2535,6 +2535,13 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
     llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
   }
 
+  /// Get element Type
+  Type *getElementType() const { return getLoadStoreType(&Ingredient); }
+
+  /// Return the cost of this VPWidenMemoryRecipe.
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override;
+
   Instruction &getIngredient() const { return Ingredient; }
 };
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 53b28a692059f6..17125bb88b958f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2079,6 +2079,37 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
+                                                 VPCostContext &Ctx) const {
+  Instruction *I = getInstructionForCost(this);
+  Type *Ty = ToVectorTy(getElementType(), VF);
+  const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(I));
+  const Value *Ptr = getLoadStorePointerOperand(I);
+  unsigned AS = getLoadStoreAddressSpace(const_cast<Instruction *>(I));
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+
+  if (Consecutive) {
+    InstructionCost Cost = 0;
+    if (IsMasked) {
+      Cost += Ctx.TTI.getMaskedMemoryOpCost(I->getOpcode(), Ty, Alignment, AS,
+                                            CostKind);
+    } else {
+      TTI::OperandValueInfo OpInfo = Ctx.TTI.getOperandInfo(I->getOperand(0));
+      Cost += Ctx.TTI.getMemoryOpCost(I->getOpcode(), Ty, Alignment, AS,
+                                      CostKind, OpInfo, I);
+    }
+    if (Reverse)
+      Cost += Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
+                                     cast<VectorType>(Ty), std::nullopt,
+                                     CostKind, 0);
+
+    return Cost;
+  }
+  return Ctx.TTI.getAddressComputationCost(Ty) +
+         Ctx.TTI.getGatherScatterOpCost(I->getOpcode(), Ty, Ptr, IsMasked,
+                                        Alignment, CostKind, I);
+}
+
 void VPWidenLoadRecipe::execute(VPTransformState &State) {
   auto *LI = cast<LoadInst>(&Ingredient);
 

>From 53972d570b474e84f7e60414e039ce0c6350d580 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 26 Aug 2024 06:10:41 -0700
Subject: [PATCH 2/4] Address comments

---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  3 -
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 55 +++++++++++--------
 2 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index b3edf7c4b51685..743552ea1306f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2535,9 +2535,6 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
     llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
   }
 
-  /// Get element Type
-  Type *getElementType() const { return getLoadStoreType(&Ingredient); }
-
   /// Return the cost of this VPWidenMemoryRecipe.
   InstructionCost computeCost(ElementCount VF,
                               VPCostContext &Ctx) const override;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 17125bb88b958f..2d9db9db1765d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2081,33 +2081,40 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 
 InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
                                                  VPCostContext &Ctx) const {
-  Instruction *I = getInstructionForCost(this);
-  Type *Ty = ToVectorTy(getElementType(), VF);
-  const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(I));
-  const Value *Ptr = getLoadStorePointerOperand(I);
-  unsigned AS = getLoadStoreAddressSpace(const_cast<Instruction *>(I));
+  Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF);
+  const Align Alignment =
+      getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
+  unsigned AS =
+      getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
 
-  if (Consecutive) {
-    InstructionCost Cost = 0;
-    if (IsMasked) {
-      Cost += Ctx.TTI.getMaskedMemoryOpCost(I->getOpcode(), Ty, Alignment, AS,
-                                            CostKind);
-    } else {
-      TTI::OperandValueInfo OpInfo = Ctx.TTI.getOperandInfo(I->getOperand(0));
-      Cost += Ctx.TTI.getMemoryOpCost(I->getOpcode(), Ty, Alignment, AS,
-                                      CostKind, OpInfo, I);
-    }
-    if (Reverse)
-      Cost += Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
-                                     cast<VectorType>(Ty), std::nullopt,
-                                     CostKind, 0);
-
-    return Cost;
+  if (!Consecutive) {
+    // TODO: Using the original IR may not be accurate.
+    // Currently, ARM will use the underlying IR to calculate gather/scatter
+    // instruction cost.
+    const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+    return Ctx.TTI.getAddressComputationCost(Ty) +
+           Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
+                                          IsMasked, Alignment, CostKind,
+                                          &Ingredient);
+  }
+
+  InstructionCost Cost = 0;
+  if (IsMasked) {
+    Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
+                                          AS, CostKind);
+  } else {
+    TTI::OperandValueInfo OpInfo =
+        Ctx.TTI.getOperandInfo(Ingredient.getOperand(0));
+    Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
+                                    CostKind, OpInfo, &Ingredient);
   }
-  return Ctx.TTI.getAddressComputationCost(Ty) +
-         Ctx.TTI.getGatherScatterOpCost(I->getOpcode(), Ty, Ptr, IsMasked,
-                                        Alignment, CostKind, I);
+  if (Reverse)
+    Cost +=
+        Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
+                               cast<VectorType>(Ty), std::nullopt, CostKind, 0);
+
+  return Cost;
 }
 
 void VPWidenLoadRecipe::execute(VPTransformState &State) {

>From d1635d5fe72709fd2f6e496e6273bcd97b562f87 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 27 Aug 2024 01:56:18 -0700
Subject: [PATCH 3/4] Address comments

---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2d9db9db1765d8..1a296acd278640 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2093,6 +2093,8 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
     // Currently, ARM will use the underlying IR to calculate gather/scatter
     // instruction cost.
     const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+    assert(!Reverse &&
+           "Inconsecutive memory access should not have the order.");
     return Ctx.TTI.getAddressComputationCost(Ty) +
            Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
                                           IsMasked, Alignment, CostKind,
@@ -2109,12 +2111,12 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
     Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
                                     CostKind, OpInfo, &Ingredient);
   }
-  if (Reverse)
-    Cost +=
-        Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
-                               cast<VectorType>(Ty), std::nullopt, CostKind, 0);
+  if (!Reverse)
+    return Cost;
 
-  return Cost;
+  return Cost += Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
+                                        cast<VectorType>(Ty), std::nullopt,
+                                        CostKind, 0);
 }
 
 void VPWidenLoadRecipe::execute(VPTransformState &State) {

>From 8e7c55ce99148fbbf8b9dadd1e8a5555f47847f8 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 27 Aug 2024 22:42:54 -0700
Subject: [PATCH 4/4] Add TODO for getInstrForCost()

---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1a296acd278640..412b2913dd0518 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -271,6 +271,12 @@ static Instruction *getInstructionForCost(const VPRecipeBase *R) {
     return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
   if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
     return IG->getInsertPos();
+  // Currently the legacy cost model only calculates the instruction cost with
+  // underlying instruction. Removing the WidenMem here will prevent
+  // force-target-instruction-cost overwriting the cost of recipe with
+  // underlying instruction which is inconsistent with the legacy model.
+  // TODO: Remove WidenMem from this function when we don't need to compare to
+  // the latecy model.
   if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
     return &WidenMem->getIngredient();
   return nullptr;