[llvm] f953b5e - [SLP]Relax assertion about subvectors mask size
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 08:35:17 PST 2024
Author: Alexey Bataev
Date: 2024-11-25T08:31:42-08:00
New Revision: f953b5eb72df77fc301aac210eab31c6270ff771
URL: https://github.com/llvm/llvm-project/commit/f953b5eb72df77fc301aac210eab31c6270ff771
DIFF: https://github.com/llvm/llvm-project/commit/f953b5eb72df77fc301aac210eab31c6270ff771.diff
LOG: [SLP]Relax assertion about subvectors mask size
SubVectorsMask might be less than CommonMask, if the vectors with larger
number of elements are permuted or reused elements are used. Need to
consider this when estimation/building the vector to avoid compiler
crash
Fixes #117518
Added:
llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 73218f377a0656..34b80c1e360815 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10875,9 +10875,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
CommonMask[Idx] = Idx;
// Add subvectors permutation cost.
if (!SubVectorsMask.empty()) {
- assert(SubVectorsMask.size() == CommonMask.size() &&
+ assert(SubVectorsMask.size() <= CommonMask.size() &&
"Expected same size of masks for subvectors and common mask.");
- SmallVector<int> SVMask(SubVectorsMask.begin(), SubVectorsMask.end());
+ SmallVector<int> SVMask(CommonMask.size(), PoisonMaskElem);
+ copy(SubVectorsMask, SVMask.begin());
for (auto [I1, I2] : zip(SVMask, CommonMask)) {
if (I2 != PoisonMaskElem) {
assert(I1 == PoisonMaskElem && "Expected unused subvectors mask");
@@ -14372,7 +14373,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
if (SubVectorsMask.empty()) {
Vec = CreateSubVectors(Vec, CommonMask);
} else {
- SmallVector<int> SVMask(SubVectorsMask.begin(), SubVectorsMask.end());
+ SmallVector<int> SVMask(CommonMask.size(), PoisonMaskElem);
+ copy(SubVectorsMask, SVMask.begin());
for (auto [I1, I2] : zip(SVMask, CommonMask)) {
if (I2 != PoisonMaskElem) {
assert(I1 == PoisonMaskElem && "Expected unused subvectors mask");
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll
new file mode 100644
index 00000000000000..6d22bb06d5e038
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-300 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT: [[XOR108_I_I_I:%.*]] = xor i64 0, 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, i64 [[XOR108_I_I_I]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v4i64(<16 x i64> [[TMP4]], <4 x i64> [[TMP2]], i64 8)
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3, i32 7, i32 8, i32 9, i32 3, i32 10, i32 11, i32 12, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i1> [[TMP10]] to <16 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i16> [[TMP11]], zeroinitializer
+; CHECK-NEXT: ret void
+;
+ %xor108.i.i.i = xor i64 0, 1
+ %conv115.i.i.i = trunc i64 %xor108.i.i.i to i16
+ %add.i.i.i.i = or i16 %conv115.i.i.i, 0
+ %add.i.frozen.i.i.i = freeze i16 %add.i.i.i.i
+ %.cmp.not.i.i.i = icmp eq i16 %add.i.frozen.i.i.i, 0
+ %cond.i1002.1.i.i.i = lshr i64 0, 0
+ %conv115.1.i.i.i = trunc i64 %cond.i1002.1.i.i.i to i16
+ %add.i.1.i.i.i = or i16 %conv115.1.i.i.i, 0
+ %add.i.frozen.1.i.i.i = freeze i16 %add.i.1.i.i.i
+ %.cmp.not.1.i.i.i = icmp eq i16 %add.i.frozen.1.i.i.i, 0
+ %cond.i1002.2.i.i.i = lshr i64 %xor108.i.i.i, 0
+ %conv115.2.i.i.i = trunc i64 %cond.i1002.2.i.i.i to i16
+ %add.i.2.i.i.i = or i16 %conv115.2.i.i.i, 0
+ %add.i.frozen.2.i.i.i = freeze i16 %add.i.2.i.i.i
+ %.cmp.not.2.i.i.i = icmp eq i16 %add.i.frozen.2.i.i.i, 0
+ %cond.i1002.3.i.i.i = lshr i64 0, 0
+ %conv115.3.i.i.i = trunc i64 %cond.i1002.3.i.i.i to i16
+ %add.i.3.i.i.i = or i16 %conv115.3.i.i.i, 0
+ %add.i.frozen.3.i.i.i = freeze i16 %add.i.3.i.i.i
+ %.cmp.not.3.i.i.i = icmp eq i16 %add.i.frozen.3.i.i.i, 0
+ %conv115.i.i.i.1 = trunc i64 %xor108.i.i.i to i16
+ %add.i.i.i.i.1 = or i16 %conv115.i.i.i.1, 0
+ %add.i.frozen.i.i.i.1 = freeze i16 %add.i.i.i.i.1
+ %.cmp.not.i.i.i.1 = icmp eq i16 %add.i.frozen.i.i.i.1, 0
+ %cond.i1002.1.i.i.i.1 = lshr i64 0, 0
+ %conv115.1.i.i.i.1 = trunc i64 %cond.i1002.1.i.i.i.1 to i16
+ %add.i.1.i.i.i.1 = or i16 %conv115.1.i.i.i.1, 0
+ %add.i.frozen.1.i.i.i.1 = freeze i16 %add.i.1.i.i.i.1
+ %.cmp.not.1.i.i.i.1 = icmp eq i16 %add.i.frozen.1.i.i.i.1, 0
+ %cond.i1002.2.i.i.i.1 = lshr i64 0, 0
+ %conv115.2.i.i.i.1 = trunc i64 %cond.i1002.2.i.i.i.1 to i16
+ %add.i.2.i.i.i.1 = or i16 %conv115.2.i.i.i.1, 0
+ %add.i.frozen.2.i.i.i.1 = freeze i16 %add.i.2.i.i.i.1
+ %.cmp.not.2.i.i.i.1 = icmp eq i16 %add.i.frozen.2.i.i.i.1, 0
+ %cond.i1002.3.i.i.i.1 = lshr i64 0, 0
+ %conv115.3.i.i.i.1 = trunc i64 %cond.i1002.3.i.i.i.1 to i16
+ %add.i.3.i.i.i.1 = or i16 %conv115.3.i.i.i.1, 0
+ %add.i.frozen.3.i.i.i.1 = freeze i16 %add.i.3.i.i.i.1
+ %.cmp.not.3.i.i.i.1 = icmp eq i16 %add.i.frozen.3.i.i.i.1, 0
+ %conv115.i.i.i.2 = trunc i64 %xor108.i.i.i to i16
+ %add.i.i.i.i.2 = or i16 %conv115.i.i.i.2, 0
+ %add.i.frozen.i.i.i.2 = freeze i16 %add.i.i.i.i.2
+ %.cmp.not.i.i.i.2 = icmp eq i16 %add.i.frozen.i.i.i.2, 0
+ %cond.i1002.1.i.i.i.2 = lshr i64 0, 0
+ %conv115.1.i.i.i.2 = trunc i64 %cond.i1002.1.i.i.i.2 to i16
+ %add.i.1.i.i.i.2 = or i16 %conv115.1.i.i.i.2, 0
+ %add.i.frozen.1.i.i.i.2 = freeze i16 %add.i.1.i.i.i.2
+ %.cmp.not.1.i.i.i.2 = icmp eq i16 %add.i.frozen.1.i.i.i.2, 0
+ %cond.i1002.2.i.i.i.2 = lshr i64 0, 0
+ %conv115.2.i.i.i.2 = trunc i64 %cond.i1002.2.i.i.i.2 to i16
+ %add.i.2.i.i.i.2 = or i16 %conv115.2.i.i.i.2, 0
+ %add.i.frozen.2.i.i.i.2 = freeze i16 %add.i.2.i.i.i.2
+ %.cmp.not.2.i.i.i.2 = icmp eq i16 %add.i.frozen.2.i.i.i.2, 0
+ %cond.i1002.3.i.i.i.2 = lshr i64 0, 0
+ %conv115.3.i.i.i.2 = trunc i64 %cond.i1002.3.i.i.i.2 to i16
+ %add.i.3.i.i.i.2 = or i16 %conv115.3.i.i.i.2, 0
+ %add.i.frozen.3.i.i.i.2 = freeze i16 %add.i.3.i.i.i.2
+ %.cmp.not.3.i.i.i.2 = icmp eq i16 %add.i.frozen.3.i.i.i.2, 0
+ %conv115.i.i.i.3 = trunc i64 %xor108.i.i.i to i16
+ %add.i.i.i.i.3 = or i16 %conv115.i.i.i.3, 0
+ %add.i.frozen.i.i.i.3 = freeze i16 %add.i.i.i.i.3
+ %.cmp.not.i.i.i.3 = icmp eq i16 %add.i.frozen.i.i.i.3, 0
+ %cond.i1002.1.i.i.i.3 = lshr i64 0, 0
+ %conv115.1.i.i.i.3 = trunc i64 %cond.i1002.1.i.i.i.3 to i16
+ %add.i.1.i.i.i.3 = or i16 %conv115.1.i.i.i.3, 0
+ %add.i.frozen.1.i.i.i.3 = freeze i16 %add.i.1.i.i.i.3
+ %.cmp.not.1.i.i.i.3 = icmp eq i16 %add.i.frozen.1.i.i.i.3, 0
+ %cond.i1002.2.i.i.i.3 = lshr i64 0, 0
+ %conv115.2.i.i.i.3 = trunc i64 %cond.i1002.2.i.i.i.3 to i16
+ %add.i.2.i.i.i.3 = or i16 %conv115.2.i.i.i.3, 0
+ %add.i.frozen.2.i.i.i.3 = freeze i16 %add.i.2.i.i.i.3
+ %.cmp.not.2.i.i.i.3 = icmp eq i16 %add.i.frozen.2.i.i.i.3, 0
+ %cond.i1002.3.i.i.i.3 = lshr i64 0, 0
+ %conv115.3.i.i.i.3 = trunc i64 %cond.i1002.3.i.i.i.3 to i16
+ %add.i.3.i.i.i.3 = or i16 %conv115.3.i.i.i.3, 0
+ %add.i.frozen.3.i.i.i.3 = freeze i16 %add.i.3.i.i.i.3
+ %.cmp.not.3.i.i.i.3 = icmp eq i16 %add.i.frozen.3.i.i.i.3, 0
+ ret void
+}
More information about the llvm-commits
mailing list