[llvm] 547ba97 - [SLP]Fix mask generation after cost estimation
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 08:18:07 PST 2025
Author: Alexey Bataev
Date: 2025-01-10T08:17:56-08:00
New Revision: 547ba9730bf05df3383150f730a689f2c8336206
URL: https://github.com/llvm/llvm-project/commit/547ba9730bf05df3383150f730a689f2c8336206
DIFF: https://github.com/llvm/llvm-project/commit/547ba9730bf05df3383150f730a689f2c8336206.diff
LOG: [SLP]Fix mask generation after cost estimation
When estimating the cost of entries shuffles for buildvectors, need to
rebuild original mask, not a generated submask, used for subregisters
analysis.
Fixes #122430
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cdfec332af37ab..e0d1f0e1d43a53 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13443,14 +13443,15 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
for_each(SubMask, [&](int &Idx) {
if (Idx == PoisonMaskElem)
return;
- Idx = (Idx % VF) - (MinElement % VF) +
+ Idx = (Idx % VF) - ((MinElement / NewVF) * NewVF) +
(Idx >= static_cast<int>(VF) ? NewVF : 0);
});
- VF = NewVF;
+ } else {
+ NewVF = VF;
}
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- auto *VecTy = getWidenedType(VL.front()->getType(), VF);
+ auto *VecTy = getWidenedType(VL.front()->getType(), NewVF);
auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
auto GetShuffleCost = [&,
&TTI = *TTI](ArrayRef<int> Mask,
@@ -13475,7 +13476,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
bool IsIdentity = true;
for (auto [I, Idx] : enumerate(FirstMask)) {
- if (Idx >= static_cast<int>(VF)) {
+ if (Idx >= static_cast<int>(NewVF)) {
Idx = PoisonMaskElem;
} else {
DemandedElts.clearBit(I);
@@ -13498,12 +13499,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
bool IsIdentity = true;
for (auto [I, Idx] : enumerate(SecondMask)) {
- if (Idx < static_cast<int>(VF) && Idx >= 0) {
+ if (Idx < static_cast<int>(NewVF) && Idx >= 0) {
Idx = PoisonMaskElem;
} else {
DemandedElts.clearBit(I);
if (Idx != PoisonMaskElem) {
- Idx -= VF;
+ Idx -= NewVF;
IsIdentity &= static_cast<int>(I) == Idx;
}
}
@@ -13523,12 +13524,24 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
/*Extract=*/false, CostKind);
const TreeEntry *BestEntry = nullptr;
if (FirstShuffleCost < ShuffleCost) {
- copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
+ std::next(Mask.begin(), (Part + 1) * VL.size()),
+ [&](int &Idx) {
+ if (Idx >= static_cast<int>(VF))
+ Idx = PoisonMaskElem;
+ });
BestEntry = Entries.front();
ShuffleCost = FirstShuffleCost;
}
if (SecondShuffleCost < ShuffleCost) {
- copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
+ std::next(Mask.begin(), (Part + 1) * VL.size()),
+ [&](int &Idx) {
+ if (Idx < static_cast<int>(VF))
+ Idx = PoisonMaskElem;
+ else
+ Idx -= VF;
+ });
BestEntry = Entries[1];
ShuffleCost = SecondShuffleCost;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
index 469421b660f319..766916fe71f351 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
@@ -10,8 +10,8 @@ define i16 @test(i16 %v1, i16 %v2) {
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer
More information about the llvm-commits
mailing list