[llvm] [SLP]Add cost estimation for gather node reshuffling (PR #115201)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 05:23:32 PST 2024
================
@@ -13134,26 +13137,150 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
// Pair.first is the offset to the vector, while Pair.second is the index of
// scalar in the list.
for (const std::pair<unsigned, int> &Pair : EntryLanes) {
- unsigned Idx = Part * VL.size() + Pair.second;
+ int Idx = Part * VL.size() + Pair.second;
Mask[Idx] =
Pair.first * VF +
(ForOrder ? std::distance(
Entries[Pair.first]->Scalars.begin(),
find(Entries[Pair.first]->Scalars, VL[Pair.second]))
: Entries[Pair.first]->findLaneForValue(VL[Pair.second]));
- IsIdentity &= Mask[Idx] == Pair.second;
+ IsIdentity &= Mask[Idx] % VL.size() == Idx % VL.size();
}
- switch (Entries.size()) {
- case 1:
- if (IsIdentity || EntryLanes.size() > 1 || VL.size() <= 2)
- return TargetTransformInfo::SK_PermuteSingleSrc;
- break;
- case 2:
- if (EntryLanes.size() > 2 || VL.size() <= 2)
- return TargetTransformInfo::SK_PermuteTwoSrc;
- break;
- default:
- break;
+ if (ForOrder || IsIdentity || Entries.empty()) {
+ switch (Entries.size()) {
+ case 1:
+ if (IsIdentity || EntryLanes.size() > 1 || VL.size() <= 2)
+ return TargetTransformInfo::SK_PermuteSingleSrc;
+ break;
+ case 2:
+ if (EntryLanes.size() > 2 || VL.size() <= 2)
+ return TargetTransformInfo::SK_PermuteTwoSrc;
+ break;
+ default:
+ break;
+ }
+ } else if (!isa<VectorType>(VL.front()->getType()) &&
+ (EntryLanes.size() > Entries.size() || VL.size() <= 2)) {
+ // Do the cost estimation if shuffle beneficial than buildvector.
+ SmallVector<int> SubMask(std::next(Mask.begin(), Part * VL.size()),
+ std::next(Mask.begin(), (Part + 1) * VL.size()));
+ int MinElement = SubMask.front(), MaxElement = SubMask.front();
+ for (int Idx : SubMask) {
+ if (Idx == PoisonMaskElem)
+ continue;
+ if (MinElement == PoisonMaskElem || MinElement % VF > Idx % VF)
+ MinElement = Idx;
+ if (MaxElement == PoisonMaskElem || MaxElement % VF < Idx % VF)
+ MaxElement = Idx;
+ }
+ assert(MaxElement >= 0 && MinElement >= 0 &&
+ MaxElement % VF >= MinElement % VF &&
+ "Expected at least single element.");
+ unsigned NewVF = std::max<unsigned>(
+ VL.size(), getFullVectorNumberOfElements(*TTI, VL.front()->getType(),
+ (MaxElement % VF) -
+ (MinElement % VF) + 1));
+ if (NewVF < VF) {
+ for_each(SubMask, [&](int &Idx) {
+ if (Idx == PoisonMaskElem)
+ return;
+ Idx = (Idx % VF) - (MinElement % VF) +
+ (Idx >= static_cast<int>(VF) ? NewVF : 0);
+ });
+ VF = NewVF;
+ }
+
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ auto *VecTy = getWidenedType(VL.front()->getType(), VF);
+ auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
+ auto GetShuffleCost = [&,
+ &TTI = *TTI](ArrayRef<int> Mask,
+ ArrayRef<const TreeEntry *> Entries,
+ VectorType *VecTy) -> InstructionCost {
+ if (Entries.size() == 1 && Entries.front()->getInterleaveFactor() > 0 &&
+ ShuffleVectorInst::isDeInterleaveMaskOfFactor(
+ Mask, Entries.front()->getInterleaveFactor()))
+ return TTI::TCC_Free;
+ return ::getShuffleCost(TTI,
+ Entries.size() > 1 ? TTI::SK_PermuteTwoSrc
+ : TTI::SK_PermuteSingleSrc,
+ VecTy, Mask, CostKind);
+ };
+ InstructionCost ShuffleCost = GetShuffleCost(SubMask, Entries, VecTy);
+ InstructionCost FirstShuffleCost = 0;
+ SmallVector<int> FirstMask(SubMask.begin(), SubMask.end());
+ if (Entries.size() == 1 || !Entries[0]->isGather()) {
+ FirstShuffleCost = ShuffleCost;
+ } else {
+ // Transform mask to include only first entry.
+ APInt DemandedElts = APInt::getAllOnes(SubMask.size());
+ bool IsIdentity = true;
+ for (auto [I, Idx] : enumerate(FirstMask)) {
+ if (Idx >= static_cast<int>(VF)) {
+ Idx = PoisonMaskElem;
+ } else {
+ DemandedElts.clearBit(I);
+ if (Idx != PoisonMaskElem)
+ IsIdentity &= static_cast<int>(I) == Idx;
+ }
+ }
+ if (!IsIdentity)
+ FirstShuffleCost = GetShuffleCost(FirstMask, Entries.front(), VecTy);
+ FirstShuffleCost += TTI->getScalarizationOverhead(
+ MaskVecTy, DemandedElts, /*Insert=*/true,
+ /*Extract=*/false, CostKind);
+ }
+ InstructionCost SecondShuffleCost = 0;
+ SmallVector<int> SecondMask(SubMask.begin(), SubMask.end());
+ if (Entries.size() == 1 || !Entries[1]->isGather()) {
+ SecondShuffleCost = ShuffleCost;
+ } else {
+ // Transform mask to include only first entry.
+ APInt DemandedElts = APInt::getAllOnes(SubMask.size());
+ bool IsIdentity = true;
+ for (auto [I, Idx] : enumerate(SecondMask)) {
+ if (Idx < static_cast<int>(VF) && Idx >= 0) {
+ Idx = PoisonMaskElem;
----------------
RKSimon wrote:
Is Idx a reference? Make it explicit in the auto?
https://github.com/llvm/llvm-project/pull/115201
More information about the llvm-commits
mailing list