[llvm] 036e48e - [SLP]Fix PR76850: do the analysis of the submask.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 8 07:53:34 PST 2024
Author: Alexey Bataev
Date: 2024-01-08T07:51:02-08:00
New Revision: 036e48e2f5f890e1f9574cdb610e2336f12038a2
URL: https://github.com/llvm/llvm-project/commit/036e48e2f5f890e1f9574cdb610e2336f12038a2
DIFF: https://github.com/llvm/llvm-project/commit/036e48e2f5f890e1f9574cdb610e2336f12038a2.diff
LOG: [SLP]Fix PR76850: do the analysis of the submask.
Need to limit the transformation of the VecMask by the corresponding part of the mask of SliceSize size to avoid compiler crash during further cost analysis.
Added:
llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cbe767537a1d63..8e22b54f002d1c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10596,7 +10596,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
inversePermutation(E->ReorderIndices, ReorderMask);
if (!ReorderMask.empty())
reorderScalars(GatheredScalars, ReorderMask);
- auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF) {
+ auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF,
+ unsigned I, unsigned SliceSize) {
if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
return isa<UndefValue>(V) && !isa<PoisonValue>(V);
}))
@@ -10619,11 +10620,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
Idx == 0) ||
(Mask.size() == InputVF &&
ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))) {
- std::iota(Mask.begin(), Mask.end(), 0);
+ std::iota(std::next(Mask.begin(), I * SliceSize),
+ std::next(Mask.begin(), (I + 1) * SliceSize), 0);
} else {
- unsigned I =
+ unsigned IVal =
*find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; });
- std::fill(Mask.begin(), Mask.end(), I);
+ std::fill(std::next(Mask.begin(), I * SliceSize),
+ std::next(Mask.begin(), (I + 1) * SliceSize), IVal);
}
return true;
};
@@ -10872,7 +10875,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
} else if (Vec1) {
IsUsedInExpr &= FindReusedSplat(
ExtractMask,
- cast<FixedVectorType>(Vec1->getType())->getNumElements());
+ cast<FixedVectorType>(Vec1->getType())->getNumElements(), 0,
+ ExtractMask.size());
ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true);
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
} else {
@@ -10898,7 +10902,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
if (TEs.size() == 1) {
IsUsedInExpr &=
- FindReusedSplat(VecMask, TEs.front()->getVectorFactor());
+ FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
ShuffleBuilder.add(*TEs.front(), VecMask);
if (TEs.front()->VectorizedValue)
IsNonPoisoned &=
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll
new file mode 100644
index 00000000000000..5e5981bdaaa804
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/splat-buildvector.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu %s -o - -slp-threshold=-100 | FileCheck %s
+declare i64 @llvm.smax.i64(i64, i64)
+
+define i8 @foo(i64 %val_i64_57) {
+; CHECK-LABEL: define i8 @foo(
+; CHECK-SAME: i64 [[VAL_I64_57:%.*]]) {
+; CHECK-NEXT: entry_1:
+; CHECK-NEXT: [[VAL_I64_58:%.*]] = call i64 @llvm.smax.i64(i64 0, i64 1)
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> <i64 0, i64 poison, i64 poison, i64 0>, i64 [[VAL_I64_57]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL_I64_58]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <4 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: ret i8 0
+;
+entry_1:
+ %val_i64_58 = call i64 @llvm.smax.i64(i64 0, i64 1)
+ %val_i1_89 = icmp ule i64 %val_i64_57, %val_i64_58
+ %val_i1_95 = icmp sle i64 0, undef
+ %val_i1_98 = icmp uge i64 %val_i64_58, %val_i64_58
+ %val_i1_99 = icmp ule i64 0, %val_i64_58
+ ret i8 0
+}
More information about the llvm-commits
mailing list