[llvm] a42aa8f - [SLP]Fix adjusting of the mask for the fully matched nodes.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 09:47:25 PST 2024
Author: Alexey Bataev
Date: 2024-12-10T09:47:16-08:00
New Revision: a42aa8f265372a15d31a2af10b548fc239ed4b51
URL: https://github.com/llvm/llvm-project/commit/a42aa8f265372a15d31a2af10b548fc239ed4b51
DIFF: https://github.com/llvm/llvm-project/commit/a42aa8f265372a15d31a2af10b548fc239ed4b51.diff
LOG: [SLP]Fix adjusting of the mask for the fully matched nodes.
When checking for the poison elements in the matches node, need to
consider the register number, when clearing the corresponding mask
element.
Fixes #119393
Added:
llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c729c6cc9195ef..d90d7cc8f29e13 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13222,9 +13222,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
copy(CommonMask, Mask.begin());
}
// Clear undef scalars.
- for (int I = 0, Sz = VL.size(); I < Sz; ++I)
+ for (unsigned I : seq<unsigned>(VL.size()))
if (isa<PoisonValue>(VL[I]))
- Mask[I] = PoisonMaskElem;
+ Mask[Part * VL.size() + I] = PoisonMaskElem;
return TargetTransformInfo::SK_PermuteSingleSrc;
}
// No perfect match, just shuffle, so choose the first tree node from the
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
new file mode 100644
index 00000000000000..dadd22217a3e67
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-200 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test() {
+; CHECK-LABEL: define i32 @test() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FUNC_135_EXIT_I:.*]]
+; CHECK: [[FUNC_135_EXIT_I]]:
+; CHECK-NEXT: [[G_228_PROMOTED166_I1105_I:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 0, i32 0, i32 0, i32 poison>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 2, i32 2, i32 poison>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP5]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison, i32 28, i32 29, i32 30, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[G_228_PROMOTED166_I1105_I]], i32 7
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 0, i32 15
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP3]], i64 0)
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 26, i32 7, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = icmp ult <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP12]], <16 x i1> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> [[TMP14]])
+; CHECK-NEXT: [[TMP16:%.*]] = zext i1 [[TMP15]] to i16
+; CHECK-NEXT: br label %[[IF_THEN_SINK_SPLIT_I:.*]]
+; CHECK: [[FOR_COND7_PREHEADER_I1132_1_I:.*]]:
+; CHECK-NEXT: br label %[[IF_THEN_SINK_SPLIT_I]]
+; CHECK: [[IF_THEN_SINK_SPLIT_I]]:
+; CHECK-NEXT: [[XOR58_5_I_I1203_3_3_SINK_I:%.*]] = phi i16 [ 0, %[[FOR_COND7_PREHEADER_I1132_1_I]] ], [ [[TMP16]], %[[FUNC_135_EXIT_I]] ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %func_135.exit.i
+
+func_135.exit.i:
+ %g_228.promoted166.i1105.i = phi i32 [ 0, %entry ]
+ %cmp55.i.i1199.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
+ %cmp55.1.i.i1200.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
+ %cmp55.2.i.i1201.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
+ %cmp55.3.i.i1202.i = icmp ugt i32 %g_228.promoted166.i1105.i, 0
+ %0 = xor i1 %cmp55.2.i.i1201.i, %cmp55.3.i.i1202.i
+ %1 = xor i1 %cmp55.1.i.i1200.i, %0
+ %2 = xor i1 %cmp55.i.i1199.i, %1
+ %dec.i.i1204.i = add i32 %g_228.promoted166.i1105.i, 0
+ %cmp55.i.i1199.1.i = icmp ugt i32 %dec.i.i1204.i, 0
+ %cmp55.1.i.i1200.1.i = icmp ugt i32 %dec.i.i1204.i, 0
+ %cmp55.2.i.i1201.1.i = icmp ugt i32 %dec.i.i1204.i, 0
+ %cmp55.3.i.i1202.1.i = icmp ugt i32 %dec.i.i1204.i, 0
+ %3 = xor i1 %cmp55.2.i.i1201.1.i, %cmp55.3.i.i1202.1.i
+ %4 = xor i1 %cmp55.1.i.i1200.1.i, %3
+ %5 = xor i1 %cmp55.i.i1199.1.i, %4
+ %6 = xor i1 %2, %5
+ %dec.i.i1204.1.i = add i32 %g_228.promoted166.i1105.i, 0
+ %cmp55.i.i1199.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
+ %cmp55.1.i.i1200.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
+ %cmp55.2.i.i1201.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
+ %cmp55.3.i.i1202.2.i = icmp ugt i32 %dec.i.i1204.1.i, 0
+ %7 = add i32 0, 0
+ %8 = icmp ult i32 %7, 0
+ %9 = xor i1 %cmp55.3.i.i1202.2.i, %8
+ %10 = xor i1 %cmp55.2.i.i1201.2.i, %9
+ %11 = xor i1 %cmp55.1.i.i1200.2.i, %10
+ %12 = xor i1 %cmp55.i.i1199.2.i, %11
+ %13 = xor i1 %12, %6
+ %dec.i.i1204.2.i = add i32 %g_228.promoted166.i1105.i, 0
+ %cmp55.i.i1199.3.i = icmp ugt i32 %dec.i.i1204.2.i, 0
+ %cmp55.1.i.i1200.3.i = icmp ugt i32 %dec.i.i1204.2.i, 0
+ %cmp55.2.i.i1201.3.i = icmp ugt i32 %dec.i.i1204.2.i, 0
+ %14 = xor i1 %cmp55.1.i.i1200.3.i, %cmp55.2.i.i1201.3.i
+ %15 = xor i1 %cmp55.i.i1199.3.i, %14
+ %16 = xor i1 %15, %13
+ %17 = zext i1 %16 to i16
+ br label %if.then.sink.split.i
+
+for.cond7.preheader.i1132.1.i:
+ br label %if.then.sink.split.i
+
+if.then.sink.split.i:
+ %xor58.5.i.i1203.3.3.sink.i = phi i16 [ 0, %for.cond7.preheader.i1132.1.i ], [ %17, %func_135.exit.i ]
+ ret i32 0
+}
More information about the llvm-commits
mailing list