[clang] [clang-tools-extra] [llvm] [SLP]Improve findReusedOrderedScalars and graph rotation. (PR #77529)
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 30 04:23:52 PST 2024
================
@@ -3779,65 +3780,169 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask,
std::optional<BoUpSLP::OrdersType>
BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
- unsigned NumScalars = TE.Scalars.size();
+ // Try to find subvector extract/insert patterns and reorder only such
+ // patterns.
+ SmallVector<Value *> GatheredScalars(TE.Scalars.begin(), TE.Scalars.end());
+ Type *ScalarTy = GatheredScalars.front()->getType();
+ int NumScalars = GatheredScalars.size();
+ if (!isValidElementType(ScalarTy))
+ return std::nullopt;
+ auto *VecTy = FixedVectorType::get(ScalarTy, NumScalars);
+ int NumParts = TTI->getNumberOfParts(VecTy);
+ if (NumParts == 0 || NumParts >= NumScalars)
+ NumParts = 1;
+ SmallVector<int> ExtractMask;
+ SmallVector<int> Mask;
+ SmallVector<SmallVector<const TreeEntry *>> Entries;
+ SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> ExtractShuffles =
+ tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
+ SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles =
+ isGatherShuffledEntry(&TE, GatheredScalars, Mask, Entries, NumParts,
+ /*ForOrder=*/true);
+ // No shuffled operands - ignore.
+ if (GatherShuffles.empty() && ExtractShuffles.empty())
+ return std::nullopt;
OrdersType CurrentOrder(NumScalars, NumScalars);
- SmallVector<int> Positions;
- SmallBitVector UsedPositions(NumScalars);
- const TreeEntry *STE = nullptr;
- // Try to find all gathered scalars that are gets vectorized in other
- // vectorize node. Here we can have only one single tree vector node to
- // correctly identify order of the gathered scalars.
- for (unsigned I = 0; I < NumScalars; ++I) {
- Value *V = TE.Scalars[I];
- if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
- continue;
- if (const auto *LocalSTE = getTreeEntry(V)) {
- if (!STE)
- STE = LocalSTE;
- else if (STE != LocalSTE)
- // Take the order only from the single vector node.
- return std::nullopt;
- unsigned Lane =
- std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
- if (Lane >= NumScalars)
- return std::nullopt;
- if (CurrentOrder[Lane] != NumScalars) {
- if (Lane != I)
+ if (GatherShuffles.size() == 1 &&
+ *GatherShuffles.front() == TTI::SK_PermuteSingleSrc &&
+ Entries.front().front()->isSame(TE.Scalars)) {
+ // Exclude nodes for strided geps from analysis, better to reorder them.
+ if (!TE.UserTreeIndices.empty() &&
+ TE.UserTreeIndices.front().UserTE->State ==
+ TreeEntry::PossibleStridedVectorize &&
+ Entries.front().front()->State == TreeEntry::NeedToGather)
+ return std::nullopt;
+ // Perfect match in the graph, will reuse the previously vectorized
+ // node. Cost is 0.
+ std::iota(CurrentOrder.begin(), CurrentOrder.end(), 0);
+ return CurrentOrder;
+ }
+ auto IsBroadcastMask = [](ArrayRef<int> Mask) {
+ int SingleElt = PoisonMaskElem;
+ return all_of(Mask, [&](int I) {
+ if (SingleElt == PoisonMaskElem && I != PoisonMaskElem)
+ SingleElt = I;
+ return I == PoisonMaskElem || I == SingleElt;
+ });
+ };
+ // Exclusive broadcast mask - ignore.
+ if ((ExtractShuffles.empty() && IsBroadcastMask(Mask) &&
+ (Entries.size() != 1 ||
+ Entries.front().front()->ReorderIndices.empty())) ||
+ (GatherShuffles.empty() && IsBroadcastMask(ExtractMask)))
+ return std::nullopt;
+ SmallBitVector ShuffledSubMasks(NumParts);
+ auto TransformMaskToOrder = [&](MutableArrayRef<unsigned> CurrentOrder,
+ ArrayRef<int> Mask, int PartSz, int NumParts,
+ function_ref<unsigned(unsigned)> GetVF) {
+ for (int I : seq<int>(0, NumParts)) {
----------------
RKSimon wrote:
why use seq instead of just a basic for loop?
https://github.com/llvm/llvm-project/pull/77529
More information about the cfe-commits
mailing list