[llvm] e7080fd - [SLP]Extra check if the intruction matked for removal, must be replaced in reduction ops
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 10:02:52 PDT 2024
Author: Alexey Bataev
Date: 2024-10-31T09:59:35-07:00
New Revision: e7080fd735d02590cdce1bef97a7f64a34145b15
URL: https://github.com/llvm/llvm-project/commit/e7080fd735d02590cdce1bef97a7f64a34145b15
DIFF: https://github.com/llvm/llvm-project/commit/e7080fd735d02590cdce1bef97a7f64a34145b15.diff
LOG: [SLP]Extra check if the intruction matked for removal, must be replaced in reduction ops
If the instruction is vectorized and it is a part of the reduced values
gather/buildvector node, it should replaced in reduced operation
instructions before removal properly, to avoid compiler crash.
Fixes #114371
Added:
llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 268546fe99e138..593964177ad795 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16127,11 +16127,13 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
if (IE->Idx != 0 &&
!(VectorizableTree.front()->isGather() &&
!IE->UserTreeIndices.empty() &&
- any_of(IE->UserTreeIndices,
- [&](const EdgeInfo &EI) {
- return EI.UserTE == VectorizableTree.front().get() &&
- EI.EdgeIdx == UINT_MAX;
- })) &&
+ (ValueToGatherNodes.lookup(I).contains(
+ VectorizableTree.front().get()) ||
+ any_of(IE->UserTreeIndices,
+ [&](const EdgeInfo &EI) {
+ return EI.UserTE == VectorizableTree.front().get() &&
+ EI.EdgeIdx == UINT_MAX;
+ }))) &&
!(GatheredLoadsEntriesFirst.has_value() &&
IE->Idx >= *GatheredLoadsEntriesFirst &&
VectorizableTree.front()->isGather() &&
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll
new file mode 100644
index 00000000000000..469f165d302a9c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test(ptr %c, i16 %a, i16 %0) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[C:%.*]], i16 [[A:%.*]], i16 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i16> [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i16 [[A]], -2
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP9]], i32 7
+; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP11]], <4 x i1> [[TMP8]], i64 0)
+; CHECK-NEXT: [[TMP13:%.*]] = freeze <8 x i1> [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP13]])
+; CHECK-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; CHECK-NEXT: store i32 [[TMP15]], ptr [[C]], align 4
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %tobool = icmp ne i16 %a, 0
+ %1 = zext i1 %tobool to i16
+ %cmp3 = icmp ugt i16 %0, %1
+ %2 = and i1 %tobool, %cmp3
+ %tobool.1 = icmp ne i16 %a, 0
+ %3 = zext i1 %tobool.1 to i16
+ %cmp3.1 = icmp ugt i16 %0, %3
+ %4 = and i1 %tobool.1, %cmp3.1
+ %5 = select i1 %2, i1 %4, i1 false
+ %tobool.2 = icmp ne i16 %a, 0
+ %6 = zext i1 %tobool.2 to i16
+ %cmp3.2 = icmp ugt i16 %0, %6
+ %7 = and i1 %tobool.2, %cmp3.2
+ %8 = select i1 %5, i1 %7, i1 false
+ %tobool.3 = icmp ne i16 %a, 0
+ %9 = zext i1 %tobool.3 to i16
+ %cmp3.3 = icmp ugt i16 %a, %9
+ %10 = icmp ult i16 %a, -2
+ %11 = and i1 %10, %cmp3.3
+ %12 = select i1 %8, i1 %11, i1 false
+ %13 = zext i1 %12 to i32
+ store i32 %13, ptr %c, align 4
+ ret i32 0
+}
More information about the llvm-commits
mailing list