[llvm] f9b438b - [SLP] Outline GEP chain cost modeling into new TTI interface - NFCI.
Valery N Dmitriev via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 14 14:01:46 PDT 2023
Author: Valery N Dmitriev
Date: 2023-03-14T14:01:34-07:00
New Revision: f9b438b519716febaddb72c774b02a01225bd712
URL: https://github.com/llvm/llvm-project/commit/f9b438b519716febaddb72c774b02a01225bd712
DIFF: https://github.com/llvm/llvm-project/commit/f9b438b519716febaddb72c774b02a01225bd712.diff
LOG: [SLP] Outline GEP chain cost modeling into new TTI interface - NFCI.
Cost modeling for GEPs should actually be target dependent but is currently
done inside SLP target-independent way.
Sinking it into TTI enables target dependent implementation.
This patch adds new TTI interface and implementation of the basic functionality
trying to retain existing cost modeling.
Differential Revision: https://reviews.llvm.org/D144770
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 9742429203552..ba8977678d36f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -279,6 +279,51 @@ class TargetTransformInfo {
ArrayRef<const Value *> Operands,
TargetCostKind CostKind = TCK_SizeAndLatency) const;
+ /// Describe known properties for a set of pointers.
+ struct PointersChainInfo {
+ /// All the GEPs in a set have same base address.
+ unsigned IsSameBaseAddress : 1;
+ /// These properties only valid if SameBaseAddress is set.
+ /// True if distance between any two neigbouring pointers is same value.
+ unsigned IsUniformStride : 1;
+ /// True if distance between any two neigbouring pointers is a known value.
+ unsigned IsKnownStride : 1;
+ unsigned Reserved : 29;
+
+ bool isSameBase() const { return IsSameBaseAddress; }
+ bool isUniformStride() const {
+ return IsSameBaseAddress && IsUniformStride;
+ }
+ bool isKnownStride() const { return IsSameBaseAddress && IsKnownStride; }
+
+ static PointersChainInfo getKnownUniformStrided() {
+ return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/1,
+ /*IsKnownStride=*/1, 0};
+ }
+ static PointersChainInfo getUniformStrided() {
+ return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/1,
+ /*IsKnownStride=*/0, 0};
+ }
+ static PointersChainInfo getKnownNonUniformStrided() {
+ return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/0,
+ /*IsKnownStride=*/1, 0};
+ }
+ static PointersChainInfo getNonUniformStrided() {
+ return {/*IsSameBaseAddress=*/1, /*IsUniformStride=*/0,
+ /*IsKnownStride=*/0, 0};
+ }
+ };
+ static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
+
+ /// Estimate the cost of a chain of pointers (typically pointer operands of a
+ /// chain of loads or stores within same block) operations set when lowered.
+ InstructionCost
+ getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
+ const PointersChainInfo &Info,
+ TargetCostKind CostKind = TTI::TCK_RecipThroughput
+
+ ) const;
+
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
/// targets where calls are unusually expensive.
@@ -1612,6 +1657,10 @@ class TargetTransformInfo::Concept {
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) = 0;
+ virtual InstructionCost
+ getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
virtual int getInlinerVectorBonusPercent() = 0;
@@ -1970,6 +2019,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
TargetTransformInfo::TargetCostKind CostKind) override {
return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
+ InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const PointersChainInfo &Info,
+ TargetCostKind CostKind) override {
+ return Impl.getPointersChainCost(Ptrs, Base, Info, CostKind);
+ }
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index e18c5b1587152..18b31396f2744 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1038,6 +1038,42 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
return TTI::TCC_Basic;
}
+ InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost = TTI::TCC_Free;
+ // In the basic model we take into account GEP instructions only
+ // (although here can come alloca instruction, a value, constants and/or
+ // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
+ // pointer). Typically, if Base is a not a GEP-instruction and all the
+ // pointers are relative to the same base address, all the rest are
+ // either GEP instructions, PHIs, bitcasts or constants. When we have same
+ // base, we just calculate cost of each non-Base GEP as an ADD operation if
+ // any their index is a non-const.
+ // If no known dependecies between the pointers cost is calculated as a sum
+ // of costs of GEP instructions.
+ for (const Value *V : Ptrs) {
+ const auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ continue;
+ if (Info.isSameBase() && V != Base) {
+ if (GEP->hasAllConstantIndices())
+ continue;
+ Cost += static_cast<T *>(this)->getArithmeticInstrCost(
+ Instruction::Add, GEP->getType(), CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
+ std::nullopt);
+ } else {
+ SmallVector<const Value *> Indices(GEP->indices());
+ Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
+ GEP->getPointerOperand(),
+ Indices, CostKind);
+ }
+ }
+ return Cost;
+ }
+
InstructionCost getInstructionCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5ef329f600f3e..f444b39a4fe77 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -220,6 +220,14 @@ TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
+InstructionCost TargetTransformInfo::getPointersChainCost(
+ ArrayRef<const Value *> Ptrs, const Value *Base,
+ const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) const {
+ assert((Base || !Info.isSameBase()) &&
+ "If pointers have same base address it has to be provided.");
+ return TTIImpl->getPointersChainCost(Ptrs, Base, Info, CostKind);
+}
+
unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) const {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6b08c6a49f34d..7bc9a4ff548d3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7170,37 +7170,85 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// Calculate cost
diff erence from vectorizing set of GEPs.
// Negative value means vectorizing is profitable.
auto GetGEPCostDiff = [=](ArrayRef<Value *> Ptrs, Value *BasePtr) {
- InstructionCost CostSavings = 0;
- for (Value *V : Ptrs) {
- if (V == BasePtr)
- continue;
- auto *Ptr = dyn_cast<GetElementPtrInst>(V);
- // GEPs may contain just addresses without instructions, considered free.
- // GEPs with all constant indices also considered to have zero cost.
- if (!Ptr || Ptr->hasAllConstantIndices())
- continue;
-
- // Here we
diff erentiate two cases: when GEPs represent a regular
- // vectorization tree node (and hence vectorized) and when the set is
- // arguments of a set of loads or stores being vectorized. In the former
- // case all the scalar GEPs will be removed as a result of vectorization.
+ InstructionCost ScalarCost = 0;
+ InstructionCost VecCost = 0;
+ // Here we
diff erentiate two cases: (1) when Ptrs represent a regular
+ // vectorization tree node (as they are pointer arguments of scattered
+ // loads) or (2) when Ptrs are the arguments of loads or stores being
+ // vectorized as plane wide unit-stride load/store since all the
+ // loads/stores are known to be from/to adjacent locations.
+ assert(E->State == TreeEntry::Vectorize &&
+ "Entry state expected to be Vectorize here.");
+ if (isa<LoadInst, StoreInst>(VL0)) {
+ // Case 2: estimate costs for pointer related costs when vectorizing to
+ // a wide load/store.
+ // Scalar cost is estimated as a set of pointers with known relationship
+ // between them.
+ // For vector code we will use BasePtr as argument for the wide load/store
+ // but we also need to account all the instructions which are going to
+ // stay in vectorized code due to uses outside of these scalar
+ // loads/stores.
+ ScalarCost = TTI->getPointersChainCost(
+ Ptrs, BasePtr, TTI::PointersChainInfo::getKnownUniformStrided(),
+ CostKind);
+
+ SmallVector<const Value *> PtrsRetainedInVecCode;
+ for (Value *V : Ptrs) {
+ if (V == BasePtr) {
+ PtrsRetainedInVecCode.push_back(V);
+ continue;
+ }
+ auto *Ptr = dyn_cast<GetElementPtrInst>(V);
+ // For simplicity assume Ptr to stay in vectorized code if it's not a
+ // GEP instruction. We don't care since it's cost considered free.
+ // TODO: We should check for any uses outside of vectorizable tree
+ // rather than just single use.
+ if (!Ptr || !Ptr->hasOneUse())
+ PtrsRetainedInVecCode.push_back(V);
+ }
+
+ if (PtrsRetainedInVecCode.size() == Ptrs.size()) {
+ // If all pointers stay in vectorized code then we don't have
+ // any savings on that.
+ LLVM_DEBUG(dumpTreeCosts(E, 0, ScalarCost, ScalarCost,
+ "Calculated GEPs cost for Tree"));
+ return InstructionCost{TTI::TCC_Free};
+ }
+ VecCost = TTI->getPointersChainCost(
+ PtrsRetainedInVecCode, BasePtr,
+ TTI::PointersChainInfo::getKnownNonUniformStrided(), CostKind);
+ } else {
+ // Case 1: Ptrs are the arguments of loads that we are going to transform
+ // into masked gather load intrinsic.
+ // All the scalar GEPs will be removed as a result of vectorization.
// For any external uses of some lanes extract element instructions will
- // be generated (which cost is estimated separately). For the latter case
- // since the set of GEPs itself is not vectorized those used more than
- // once will remain staying in vectorized code as well. So we should not
- // count them as savings.
- if (!Ptr->hasOneUse() && isa<LoadInst, StoreInst>(VL0))
- continue;
+ // be generated (which cost is estimated separately).
+ TTI::PointersChainInfo PtrsInfo =
+ all_of(Ptrs,
+ [](const Value *V) {
+ auto *Ptr = dyn_cast<GetElementPtrInst>(V);
+ return Ptr && !Ptr->hasAllConstantIndices();
+ })
+ ? TTI::PointersChainInfo::getNonUniformStrided()
+ : TTI::PointersChainInfo::getKnownNonUniformStrided();
+
+ ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, CostKind);
- // TODO: it is target dependent, so need to implement and then use a TTI
- // interface.
- CostSavings += TTI->getArithmeticInstrCost(Instruction::Add,
- Ptr->getType(), CostKind);
+ // Remark: it not quite correct to use scalar GEP cost for a vector GEP,
+ // but it's not clear how to do that without having vector GEP arguments
+ // ready.
+ // Perhaps using just TTI::TCC_Free/TTI::TCC_Basic would be better option.
+ if (const auto *Base = dyn_cast<GetElementPtrInst>(BasePtr)) {
+ SmallVector<const Value *> Indices(Base->indices());
+ VecCost = TTI->getGEPCost(Base->getSourceElementType(),
+ Base->getPointerOperand(), Indices, CostKind);
+ }
}
- LLVM_DEBUG(dbgs() << "SLP: Calculated GEPs cost savings or Tree:\n";
- E->dump());
- LLVM_DEBUG(dbgs() << "SLP: GEP cost saving = " << CostSavings << "\n");
- return InstructionCost() - CostSavings;
+
+ LLVM_DEBUG(dumpTreeCosts(E, 0, VecCost, ScalarCost,
+ "Calculated GEPs cost for Tree"));
+
+ return VecCost - ScalarCost;
};
switch (ShuffleOrOp) {
More information about the llvm-commits
mailing list