[llvm] 36e8db7 - [SLP][NFC]Extract main part of GetGEPCostDiff to a function, NFC.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 08:09:16 PST 2024
Author: Alexey Bataev
Date: 2024-02-06T08:05:42-08:00
New Revision: 36e8db7d8c9183c66363e76517772b074b4f53be
URL: https://github.com/llvm/llvm-project/commit/36e8db7d8c9183c66363e76517772b074b4f53be
DIFF: https://github.com/llvm/llvm-project/commit/36e8db7d8c9183c66363e76517772b074b4f53be.diff
LOG: [SLP][NFC]Extract main part of GetGEPCostDiff to a function, NFC.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b8d04322de298..c35d39f0a9217 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6954,6 +6954,82 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
}
+/// Calculate the scalar and the vector costs from vectorizing set of GEPs.
+static std::pair<InstructionCost, InstructionCost>
+getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
+ Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind,
+ Type *ScalarTy, VectorType *VecTy) {
+ InstructionCost ScalarCost = 0;
+ InstructionCost VecCost = 0;
+ // Here we
diff erentiate two cases: (1) when Ptrs represent a regular
+ // vectorization tree node (as they are pointer arguments of scattered
+ // loads) or (2) when Ptrs are the arguments of loads or stores being
+ // vectorized as plane wide unit-stride load/store since all the
+ // loads/stores are known to be from/to adjacent locations.
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+ // Case 2: estimate costs for pointer related costs when vectorizing to
+ // a wide load/store.
+ // Scalar cost is estimated as a set of pointers with known relationship
+ // between them.
+ // For vector code we will use BasePtr as argument for the wide load/store
+ // but we also need to account all the instructions which are going to
+ // stay in vectorized code due to uses outside of these scalar
+ // loads/stores.
+ ScalarCost = TTI.getPointersChainCost(
+ Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
+ CostKind);
+
+ SmallVector<const Value *> PtrsRetainedInVecCode;
+ for (Value *V : Ptrs) {
+ if (V == BasePtr) {
+ PtrsRetainedInVecCode.push_back(V);
+ continue;
+ }
+ auto *Ptr = dyn_cast<GetElementPtrInst>(V);
+ // For simplicity assume Ptr to stay in vectorized code if it's not a
+ // GEP instruction. We don't care since it's cost considered free.
+ // TODO: We should check for any uses outside of vectorizable tree
+ // rather than just single use.
+ if (!Ptr || !Ptr->hasOneUse())
+ PtrsRetainedInVecCode.push_back(V);
+ }
+
+ if (PtrsRetainedInVecCode.size() == Ptrs.size()) {
+ // If all pointers stay in vectorized code then we don't have
+ // any savings on that.
+ return std::make_pair(TTI::TCC_Free, TTI::TCC_Free);
+ }
+ VecCost = TTI.getPointersChainCost(PtrsRetainedInVecCode, BasePtr,
+ TTI::PointersChainInfo::getKnownStride(),
+ VecTy, CostKind);
+ } else {
+ // Case 1: Ptrs are the arguments of loads that we are going to transform
+ // into masked gather load intrinsic.
+ // All the scalar GEPs will be removed as a result of vectorization.
+ // For any external uses of some lanes extract element instructions will
+ // be generated (which cost is estimated separately).
+ TTI::PointersChainInfo PtrsInfo =
+ all_of(Ptrs,
+ [](const Value *V) {
+ auto *Ptr = dyn_cast<GetElementPtrInst>(V);
+ return Ptr && !Ptr->hasAllConstantIndices();
+ })
+ ? TTI::PointersChainInfo::getUnknownStride()
+ : TTI::PointersChainInfo::getKnownStride();
+
+ ScalarCost =
+ TTI.getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy, CostKind);
+ if (auto *BaseGEP = dyn_cast<GEPOperator>(BasePtr)) {
+ SmallVector<const Value *> Indices(BaseGEP->indices());
+ VecCost = TTI.getGEPCost(BaseGEP->getSourceElementType(),
+ BaseGEP->getPointerOperand(), Indices, VecTy,
+ CostKind);
+ }
+ }
+
+ return std::make_pair(ScalarCost, VecCost);
+}
+
/// Merges shuffle masks and emits final shuffle instruction, if required. It
/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
/// when the actual shuffle instruction is generated only if this is actually
@@ -7917,78 +7993,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// Calculate cost
diff erence from vectorizing set of GEPs.
// Negative value means vectorizing is profitable.
auto GetGEPCostDiff = [=](ArrayRef<Value *> Ptrs, Value *BasePtr) {
- InstructionCost ScalarCost = 0;
- InstructionCost VecCost = 0;
- // Here we
diff erentiate two cases: (1) when Ptrs represent a regular
- // vectorization tree node (as they are pointer arguments of scattered
- // loads) or (2) when Ptrs are the arguments of loads or stores being
- // vectorized as plane wide unit-stride load/store since all the
- // loads/stores are known to be from/to adjacent locations.
assert(E->State == TreeEntry::Vectorize &&
"Entry state expected to be Vectorize here.");
- if (isa<LoadInst, StoreInst>(VL0)) {
- // Case 2: estimate costs for pointer related costs when vectorizing to
- // a wide load/store.
- // Scalar cost is estimated as a set of pointers with known relationship
- // between them.
- // For vector code we will use BasePtr as argument for the wide load/store
- // but we also need to account all the instructions which are going to
- // stay in vectorized code due to uses outside of these scalar
- // loads/stores.
- ScalarCost = TTI->getPointersChainCost(
- Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
- CostKind);
-
- SmallVector<const Value *> PtrsRetainedInVecCode;
- for (Value *V : Ptrs) {
- if (V == BasePtr) {
- PtrsRetainedInVecCode.push_back(V);
- continue;
- }
- auto *Ptr = dyn_cast<GetElementPtrInst>(V);
- // For simplicity assume Ptr to stay in vectorized code if it's not a
- // GEP instruction. We don't care since it's cost considered free.
- // TODO: We should check for any uses outside of vectorizable tree
- // rather than just single use.
- if (!Ptr || !Ptr->hasOneUse())
- PtrsRetainedInVecCode.push_back(V);
- }
-
- if (PtrsRetainedInVecCode.size() == Ptrs.size()) {
- // If all pointers stay in vectorized code then we don't have
- // any savings on that.
- LLVM_DEBUG(dumpTreeCosts(E, 0, ScalarCost, ScalarCost,
- "Calculated GEPs cost for Tree"));
- return InstructionCost{TTI::TCC_Free};
- }
- VecCost = TTI->getPointersChainCost(
- PtrsRetainedInVecCode, BasePtr,
- TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind);
- } else {
- // Case 1: Ptrs are the arguments of loads that we are going to transform
- // into masked gather load intrinsic.
- // All the scalar GEPs will be removed as a result of vectorization.
- // For any external uses of some lanes extract element instructions will
- // be generated (which cost is estimated separately).
- TTI::PointersChainInfo PtrsInfo =
- all_of(Ptrs,
- [](const Value *V) {
- auto *Ptr = dyn_cast<GetElementPtrInst>(V);
- return Ptr && !Ptr->hasAllConstantIndices();
- })
- ? TTI::PointersChainInfo::getUnknownStride()
- : TTI::PointersChainInfo::getKnownStride();
-
- ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy,
- CostKind);
- if (auto *BaseGEP = dyn_cast<GEPOperator>(BasePtr)) {
- SmallVector<const Value *> Indices(BaseGEP->indices());
- VecCost = TTI->getGEPCost(BaseGEP->getSourceElementType(),
- BaseGEP->getPointerOperand(), Indices, VecTy,
- CostKind);
- }
- }
-
+ InstructionCost ScalarCost = 0;
+ InstructionCost VecCost = 0;
+ std::tie(ScalarCost, VecCost) = getGEPCosts(
+ *TTI, Ptrs, BasePtr, E->getOpcode(), CostKind, ScalarTy, VecTy);
LLVM_DEBUG(dumpTreeCosts(E, 0, VecCost, ScalarCost,
"Calculated GEPs cost for Tree"));
More information about the llvm-commits
mailing list