[llvm] c787986 - [SLP]Improve costs of vectorized loads/stores by analyzing GEPs.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 13 07:23:53 PDT 2022


Author: Alexey Bataev
Date: 2022-10-13T07:20:41-07:00
New Revision: c787986cddce230e8ca28a773166b5773449bcb9

URL: https://github.com/llvm/llvm-project/commit/c787986cddce230e8ca28a773166b5773449bcb9
DIFF: https://github.com/llvm/llvm-project/commit/c787986cddce230e8ca28a773166b5773449bcb9.diff

LOG: [SLP]Improve costs of vectorized loads/stores by analyzing GEPs.

When generating masked gathers nodes, SLP vectorizer accounts the cost
of the GEPs for loads as part of the scalar-vector transformation cost
estimation. But it does not do it for vectorized loads/stores, while it
may completely remove some of the GEPs completely. Because of this in
some cases masked gather operation can be much more profitable rather
than regular vectorization (masked-gather cost + vector GEP - scalar
loads + GEPs comparing to vectorized loads - scalar loads).
Added the analysis of the removed scalarGEPs for vectorized load/store nodes for better cost estimation.

Differential Revision: https://reviews.llvm.org/D135282

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 865578f7c4f09..0011980c2f7fe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -934,6 +934,18 @@ class BoUpSLP {
   /// Construct a vectorizable tree that starts at \p Roots.
   void buildTree(ArrayRef<Value *> Roots);
 
+  /// Checks if the very first tree node is going to be vectorized.
+  bool isVectorizedFirstNode() const {
+    return !VectorizableTree.empty() &&
+           VectorizableTree.front()->State == TreeEntry::Vectorize;
+  }
+
+  /// Returns the main instruction for the very first node.
+  Instruction *getFirstNodeMainOp() const {
+    assert(!VectorizableTree.empty() && "No tree to get the first node from");
+    return VectorizableTree.front()->getMainOp();
+  }
+
   /// Builds external uses of the vectorized scalars, i.e. the list of
   /// vectorized scalars to be extracted, their lanes and their scalar users. \p
   /// ExternallyUsedValues contains additional list of external uses to handle
@@ -6630,8 +6642,20 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
       InstructionCost VecLdCost;
       if (E->State == TreeEntry::Vectorize) {
-        VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0,
-                                         CostKind, {TTI::OK_AnyValue, TTI::OP_None}, VL0);
+        VecLdCost =
+            TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0,
+                                 CostKind, TTI::OperandValueInfo(), VL0);
+        for (Value *V : VL) {
+          auto *VI = cast<LoadInst>(V);
+          // Add the costs of scalar GEP pointers, to be removed from the code.
+          if (VI == VL0)
+            continue;
+          auto *Ptr = dyn_cast<GetElementPtrInst>(VI->getPointerOperand());
+          if (!Ptr || !Ptr->hasOneUse() || Ptr->hasAllConstantIndices())
+            continue;
+          ScalarLdCost += TTI->getArithmeticInstrCost(Instruction::Add,
+                                                      Ptr->getType(), CostKind);
+        }
       } else {
         assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
         Align CommonAlignment = Alignment;
@@ -6653,11 +6677,19 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       Align Alignment = SI->getAlign();
       InstructionCost ScalarStCost = 0;
       for (auto *V : VL) {
-        auto *VI = cast<Instruction>(V);
+        auto *VI = cast<StoreInst>(V);
         TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getOperand(0));
         ScalarStCost +=
           TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
                                CostKind, OpInfo, VI);
+        // Add the costs of scalar GEP pointers, to be removed from the code.
+        if (VI == SI)
+          continue;
+        auto *Ptr = dyn_cast<GetElementPtrInst>(VI->getPointerOperand());
+        if (!Ptr || !Ptr->hasOneUse() || Ptr->hasAllConstantIndices())
+          continue;
+        ScalarStCost += TTI->getArithmeticInstrCost(Instruction::Add,
+                                                    Ptr->getType(), CostKind);
       }
       TTI::OperandValueInfo OpInfo = getOperandInfo(VL, 0);
       InstructionCost VecStCost =
@@ -11499,6 +11531,21 @@ class HorizontalReduction {
         InstructionCost TreeCost = V.getTreeCost(VL);
         InstructionCost ReductionCost =
             getReductionCost(TTI, VL, ReduxWidth, RdxFMF);
+        if (V.isVectorizedFirstNode() && isa<LoadInst>(VL.front())) {
+          Instruction *MainOp = V.getFirstNodeMainOp();
+          for (Value *V : VL) {
+            auto *VI = dyn_cast<LoadInst>(V);
+            // Add the costs of scalar GEP pointers, to be removed from the
+            // code.
+            if (!VI || VI == MainOp)
+              continue;
+            auto *Ptr = dyn_cast<GetElementPtrInst>(VI->getPointerOperand());
+            if (!Ptr || !Ptr->hasOneUse() || Ptr->hasAllConstantIndices())
+              continue;
+            TreeCost -= TTI->getArithmeticInstrCost(
+                Instruction::Add, Ptr->getType(), TTI::TCK_RecipThroughput);
+          }
+        }
         InstructionCost Cost = TreeCost + ReductionCost;
         LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n");
         if (!Cost.isValid()) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
index 285d2d447ca53..a0f776886ad4b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
@@ -91,7 +91,7 @@ for.body:                                         ; preds = %for.body, %entry
   ; YAML-NEXT: Function:        foo
   ; YAML-NEXT: Args:
   ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
-  ; YAML-NEXT:   - Cost:            '-5'
+  ; YAML-NEXT:   - Cost:            '-14'
   ; YAML-NEXT:   - String:          ' and with tree size '
   ; YAML-NEXT:   - TreeSize:        '4'
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
index d47fb1cc4e74a..1a10bd481690f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
@@ -68,7 +68,7 @@ for.body:                                         ; preds = %for.body, %entry
   ; YAML-NEXT: Function:        foo
   ; YAML-NEXT: Args:
   ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
-  ; YAML-NEXT:   - Cost:            '-1'
+  ; YAML-NEXT:   - Cost:            '-4'
   ; YAML-NEXT:   - String:          ' and with tree size '
   ; YAML-NEXT:   - TreeSize:        '4'
 


        


More information about the llvm-commits mailing list