[llvm] d00158c - [SLP][NFC]Introduce ShuffleCostEstimator and adjustExtracts member function.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 11 12:48:35 PDT 2023
Author: Alexey Bataev
Date: 2023-04-11T12:47:07-07:00
New Revision: d00158cd284597165f1dd4245fa3e9ae2fe82eb5
URL: https://github.com/llvm/llvm-project/commit/d00158cd284597165f1dd4245fa3e9ae2fe82eb5
DIFF: https://github.com/llvm/llvm-project/commit/d00158cd284597165f1dd4245fa3e9ae2fe82eb5.diff
LOG: [SLP][NFC]Introduce ShuffleCostEstimator and adjustExtracts member function.
Added ShuffleCostEstimator class and the first adjustExtracts member,
which is just a copy of previous AdjustExtractCost lambda.
Differential Revision: https://reviews.llvm.org/D147787
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c0c080abf6256..2a20b332ea2e1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1080,6 +1080,7 @@ namespace slpvectorizer {
class BoUpSLP {
struct TreeEntry;
struct ScheduleData;
+ class ShuffleCostEstimator;
class ShuffleInstructionBuilder;
public:
@@ -6792,43 +6793,38 @@ class BaseShuffleAnalysis {
};
} // namespace
-InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
- ArrayRef<Value *> VectorizedVals) {
- ArrayRef<Value *> VL = E->Scalars;
-
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
- else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
- ScalarTy = CI->getOperand(0)->getType();
- else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
- ScalarTy = IE->getOperand(1)->getType();
- auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
- // If we have computed a smaller type for the expression, update VecTy so
- // that the costs will be accurate.
- if (MinBWs.count(VL[0]))
- VecTy = FixedVectorType::get(
- IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
- unsigned EntryVF = E->getVectorFactor();
- auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
+/// Merges shuffle masks and emits final shuffle instruction, if required. It
+/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
+/// when the actual shuffle instruction is generated only if this is actually
+/// required. Otherwise, the shuffle instruction emission is delayed till the
+/// end of the process, to reduce the number of emitted instructions and further
+/// analysis/transformations.
+class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
+ bool IsFinalized = false;
+ const TargetTransformInfo &TTI;
+ InstructionCost Cost = 0;
+ ArrayRef<Value *> VectorizedVals;
+ BoUpSLP &R;
+ constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- // FIXME: it tries to fix a problem with MSVC buildbots.
- TargetTransformInfo *TTI = this->TTI;
- auto AdjustExtractsCost = [=](InstructionCost &Cost,
- ArrayRef<int> Mask) -> Value * {
+public:
+ ShuffleCostEstimator(TargetTransformInfo &TTI,
+ ArrayRef<Value *> VectorizedVals, BoUpSLP &R)
+ : TTI(TTI), VectorizedVals(VectorizedVals), R(R) {}
+ Value *adjustExtracts(const TreeEntry *E, ArrayRef<int> Mask) {
if (Mask.empty())
return nullptr;
Value *VecBase = nullptr;
+ ArrayRef<Value *> VL = E->Scalars;
+ auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
// If the resulting type is scalarized, do not adjust the cost.
- unsigned VecNumParts = TTI->getNumberOfParts(VecTy);
+ unsigned VecNumParts = TTI.getNumberOfParts(VecTy);
if (VecNumParts == VecTy->getNumElements())
return nullptr;
DenseMap<Value *, int> ExtractVectorsTys;
SmallPtrSet<Value *, 4> CheckedExtracts;
for (auto [I, V] : enumerate(VL)) {
+ // Ignore non-extractelement scalars.
if (isa<UndefValue>(V) || (!Mask.empty() && Mask[I] == UndefMaskElem))
continue;
// If all users of instruction are going to be vectorized and this
@@ -6837,9 +6833,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// vectorized tree.
// Also, avoid adjusting the cost for extractelements with multiple uses
// in
diff erent graph entries.
- const TreeEntry *VE = getTreeEntry(V);
+ const TreeEntry *VE = R.getTreeEntry(V);
if (!CheckedExtracts.insert(V).second ||
- !areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
+ !R.areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
(VE && VE != E))
continue;
auto *EE = cast<ExtractElementInst>(V);
@@ -6848,7 +6844,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (!EEIdx)
continue;
unsigned Idx = *EEIdx;
- if (VecNumParts != TTI->getNumberOfParts(EE->getVectorOperandType())) {
+ if (VecNumParts != TTI.getNumberOfParts(EE->getVectorOperandType())) {
auto It =
ExtractVectorsTys.try_emplace(EE->getVectorOperand(), Idx).first;
It->getSecond() = std::min<int>(It->second, Idx);
@@ -6861,18 +6857,17 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
})) {
// Use getExtractWithExtendCost() to calculate the cost of
// extractelement/ext pair.
- Cost -=
- TTI->getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
- EE->getVectorOperandType(), Idx);
+ Cost -= TTI.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
+ EE->getVectorOperandType(), Idx);
// Add back the cost of s|zext which is subtracted separately.
- Cost += TTI->getCastInstrCost(
+ Cost += TTI.getCastInstrCost(
Ext->getOpcode(), Ext->getType(), EE->getType(),
TTI::getCastContextHint(Ext), CostKind, Ext);
continue;
}
}
- Cost -= TTI->getVectorInstrCost(*EE, EE->getVectorOperandType(), CostKind,
- Idx);
+ Cost -= TTI.getVectorInstrCost(*EE, EE->getVectorOperandType(), CostKind,
+ Idx);
}
// Add a cost for subvector extracts/inserts if required.
for (const auto &Data : ExtractVectorsTys) {
@@ -6880,37 +6875,70 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
unsigned NumElts = VecTy->getNumElements();
if (Data.second % NumElts == 0)
continue;
- if (TTI->getNumberOfParts(EEVTy) > VecNumParts) {
+ if (TTI.getNumberOfParts(EEVTy) > VecNumParts) {
unsigned Idx = (Data.second / NumElts) * NumElts;
unsigned EENumElts = EEVTy->getNumElements();
if (Idx % NumElts == 0)
continue;
if (Idx + NumElts <= EENumElts) {
- Cost +=
- TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, std::nullopt, CostKind, Idx, VecTy);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+ EEVTy, std::nullopt, CostKind, Idx, VecTy);
} else {
// Need to round up the subvector type vectorization factor to avoid a
// crash in cost model functions. Make SubVT so that Idx + VF of SubVT
// <= EENumElts.
auto *SubVT =
FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
- Cost +=
- TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, std::nullopt, CostKind, Idx, SubVT);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+ EEVTy, std::nullopt, CostKind, Idx, SubVT);
}
} else {
- Cost += TTI->getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
- VecTy, std::nullopt, CostKind, 0, EEVTy);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
+ VecTy, std::nullopt, CostKind, 0, EEVTy);
}
}
return VecBase;
- };
+ }
+ /// Finalize emission of the shuffles.
+ InstructionCost finalize() {
+ IsFinalized = true;
+ return Cost;
+ }
+
+ ~ShuffleCostEstimator() {
+ assert(IsFinalized && "Shuffle construction must be finalized.");
+ }
+};
+
+InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
+ ArrayRef<Value *> VectorizedVals) {
+ ArrayRef<Value *> VL = E->Scalars;
+
+ Type *ScalarTy = VL[0]->getType();
+ if (auto *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ else if (auto *CI = dyn_cast<CmpInst>(VL[0]))
+ ScalarTy = CI->getOperand(0)->getType();
+ else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
+ ScalarTy = IE->getOperand(1)->getType();
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+
+ // If we have computed a smaller type for the expression, update VecTy so
+ // that the costs will be accurate.
+ if (MinBWs.count(VL[0]))
+ VecTy = FixedVectorType::get(
+ IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
+ unsigned EntryVF = E->getVectorFactor();
+ auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
+
+ bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->State == TreeEntry::NeedToGather) {
if (allConstant(VL))
return 0;
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
+ ShuffleCostEstimator Estimator(*TTI, VectorizedVals, *this);
unsigned VF = E->getVectorFactor();
SmallVector<int> ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(),
E->ReuseShuffleIndices.end());
@@ -6933,15 +6961,15 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (UserIgnoreList)
IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end());
- InstructionCost Cost = 0;
bool Resized = false;
- if (Value *VecBase = AdjustExtractsCost(Cost, ExtractMask))
+ if (Value *VecBase = Estimator.adjustExtracts(E, ExtractMask))
if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) {
Resized = true;
GatheredScalars.append(VF - GatheredScalars.size(),
PoisonValue::get(ScalarTy));
}
+ InstructionCost ExtractCost = Estimator.finalize();
// Do not try to look for reshuffled loads for gathered loads (they will be
// handled later), for vectorized scalars, and cases, which are definitely
@@ -7003,11 +7031,10 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// single input vector or of 2 input vectors.
InstructionCost Cost =
computeExtractCost(VL, VecTy, *ExtractShuffle, ExtractMask, *TTI);
- AdjustExtractsCost(Cost, ExtractMask);
if (NeedToShuffleReuses)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
FinalVecTy, E->ReuseShuffleIndices);
- return Cost;
+ return Cost + ExtractCost;
}
if (isSplat(VL)) {
// Found the broadcasting of the single scalar, calculate the cost as the
More information about the llvm-commits
mailing list