[llvm] c787986 - [SLP]Improve costs of vectorized loads/stores by analyzing GEPs.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 13 07:23:53 PDT 2022
Author: Alexey Bataev
Date: 2022-10-13T07:20:41-07:00
New Revision: c787986cddce230e8ca28a773166b5773449bcb9
URL: https://github.com/llvm/llvm-project/commit/c787986cddce230e8ca28a773166b5773449bcb9
DIFF: https://github.com/llvm/llvm-project/commit/c787986cddce230e8ca28a773166b5773449bcb9.diff
LOG: [SLP]Improve costs of vectorized loads/stores by analyzing GEPs.
When generating masked gathers nodes, SLP vectorizer accounts the cost
of the GEPs for loads as part of the scalar-vector transformation cost
estimation. But it does not do it for vectorized loads/stores, while it
may completely remove some of the GEPs completely. Because of this in
some cases masked gather operation can be much more profitable rather
than regular vectorization (masked-gather cost + vector GEP - scalar
loads + GEPs comparing to vectorized loads - scalar loads).
Added the analysis of the removed scalarGEPs for vectorized load/store nodes for better cost estimation.
Differential Revision: https://reviews.llvm.org/D135282
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 865578f7c4f09..0011980c2f7fe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -934,6 +934,18 @@ class BoUpSLP {
/// Construct a vectorizable tree that starts at \p Roots.
void buildTree(ArrayRef<Value *> Roots);
+ /// Checks if the very first tree node is going to be vectorized.
+ bool isVectorizedFirstNode() const {
+ return !VectorizableTree.empty() &&
+ VectorizableTree.front()->State == TreeEntry::Vectorize;
+ }
+
+ /// Returns the main instruction for the very first node.
+ Instruction *getFirstNodeMainOp() const {
+ assert(!VectorizableTree.empty() && "No tree to get the first node from");
+ return VectorizableTree.front()->getMainOp();
+ }
+
/// Builds external uses of the vectorized scalars, i.e. the list of
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
/// ExternallyUsedValues contains additional list of external uses to handle
@@ -6630,8 +6642,20 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecLdCost;
if (E->State == TreeEntry::Vectorize) {
- VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0,
- CostKind, {TTI::OK_AnyValue, TTI::OP_None}, VL0);
+ VecLdCost =
+ TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0,
+ CostKind, TTI::OperandValueInfo(), VL0);
+ for (Value *V : VL) {
+ auto *VI = cast<LoadInst>(V);
+ // Add the costs of scalar GEP pointers, to be removed from the code.
+ if (VI == VL0)
+ continue;
+ auto *Ptr = dyn_cast<GetElementPtrInst>(VI->getPointerOperand());
+ if (!Ptr || !Ptr->hasOneUse() || Ptr->hasAllConstantIndices())
+ continue;
+ ScalarLdCost += TTI->getArithmeticInstrCost(Instruction::Add,
+ Ptr->getType(), CostKind);
+ }
} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
Align CommonAlignment = Alignment;
@@ -6653,11 +6677,19 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
Align Alignment = SI->getAlign();
InstructionCost ScalarStCost = 0;
for (auto *V : VL) {
- auto *VI = cast<Instruction>(V);
+ auto *VI = cast<StoreInst>(V);
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getOperand(0));
ScalarStCost +=
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
CostKind, OpInfo, VI);
+ // Add the costs of scalar GEP pointers, to be removed from the code.
+ if (VI == SI)
+ continue;
+ auto *Ptr = dyn_cast<GetElementPtrInst>(VI->getPointerOperand());
+ if (!Ptr || !Ptr->hasOneUse() || Ptr->hasAllConstantIndices())
+ continue;
+ ScalarStCost += TTI->getArithmeticInstrCost(Instruction::Add,
+ Ptr->getType(), CostKind);
}
TTI::OperandValueInfo OpInfo = getOperandInfo(VL, 0);
InstructionCost VecStCost =
@@ -11499,6 +11531,21 @@ class HorizontalReduction {
InstructionCost TreeCost = V.getTreeCost(VL);
InstructionCost ReductionCost =
getReductionCost(TTI, VL, ReduxWidth, RdxFMF);
+ if (V.isVectorizedFirstNode() && isa<LoadInst>(VL.front())) {
+ Instruction *MainOp = V.getFirstNodeMainOp();
+ for (Value *V : VL) {
+ auto *VI = dyn_cast<LoadInst>(V);
+ // Add the costs of scalar GEP pointers, to be removed from the
+ // code.
+ if (!VI || VI == MainOp)
+ continue;
+ auto *Ptr = dyn_cast<GetElementPtrInst>(VI->getPointerOperand());
+ if (!Ptr || !Ptr->hasOneUse() || Ptr->hasAllConstantIndices())
+ continue;
+ TreeCost -= TTI->getArithmeticInstrCost(
+ Instruction::Add, Ptr->getType(), TTI::TCK_RecipThroughput);
+ }
+ }
InstructionCost Cost = TreeCost + ReductionCost;
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n");
if (!Cost.isValid()) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
index 285d2d447ca53..a0f776886ad4b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
@@ -91,7 +91,7 @@ for.body: ; preds = %for.body, %entry
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
- ; YAML-NEXT: - Cost: '-5'
+ ; YAML-NEXT: - Cost: '-14'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '4'
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
index d47fb1cc4e74a..1a10bd481690f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
@@ -68,7 +68,7 @@ for.body: ; preds = %for.body, %entry
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
- ; YAML-NEXT: - Cost: '-1'
+ ; YAML-NEXT: - Cost: '-4'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '4'
More information about the llvm-commits
mailing list