[llvm] [SLP] Fix crash of shuffle poison (PR #106857)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 31 08:28:17 PDT 2024
https://github.com/tcwzxx created https://github.com/llvm/llvm-project/pull/106857
When the shuffle masks are `PoisonMaskElem`, there is not need to check the cost of `SK_ExtractSubvector`. It is free. Otherwise, it will cause the compiler to crash.
Assertion `(Idx + EltsPerVector) <= alignTo(NumElts, EltsPerVector) && "SK_ExtractSubvector index out of range"' failed.
>From 4dd7f80c6fd7186120859d2b91a960613c09bfca Mon Sep 17 00:00:00 2001
From: tcwzxx <tcwzxx at gmail.com>
Date: Sat, 31 Aug 2024 23:18:29 +0800
Subject: [PATCH] Fix crash of shuffle poison
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 3 +-
.../crash_extractelement_from_null.ll | 34 +++++++++++++++++++
2 files changed, 36 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/crash_extractelement_from_null.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3d41c978281351..fa16f4632174de 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8703,7 +8703,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
auto CheckPerRegistersShuffle = [&](MutableArrayRef<int> Mask,
SmallVectorImpl<unsigned> &Indices)
-> std::optional<TTI::ShuffleKind> {
- if (NumElts <= EltsPerVector)
+ if (NumElts <= EltsPerVector ||
+ all_of(Mask, [](int I) { return I == PoisonMaskElem; }))
return std::nullopt;
int OffsetReg0 =
alignDown(std::accumulate(Mask.begin(), Mask.end(), INT_MAX,
diff --git a/llvm/test/Transforms/SLPVectorizer/crash_extractelement_from_null.ll b/llvm/test/Transforms/SLPVectorizer/crash_extractelement_from_null.ll
new file mode 100644
index 00000000000000..87d92706bf36a4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/crash_extractelement_from_null.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
+
+define void @test(i8 %0, i8 %1) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = load <4 x i8>, ptr getelementptr (i8, ptr null, i32 8), align 1
+; CHECK-NEXT: [[LI15:%.*]] = extractelement <4 x i8> [[L]], i64 15
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP0]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i8 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP0]], 0
+; CHECK-NEXT: [[DOTI15:%.*]] = icmp ne i8 [[LI15]], 0
+; CHECK-NEXT: [[I0244:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[TMP2]], i64 0
+; CHECK-NEXT: [[I1245:%.*]] = insertelement <4 x i1> [[I0244]], i1 [[TMP3]], i64 1
+; CHECK-NEXT: [[I2246:%.*]] = insertelement <4 x i1> [[I1245]], i1 [[TMP4]], i64 2
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[I2246]], i1 [[DOTI15]], i64 3
+; CHECK-NEXT: ret void
+;
+entry:
+ %l = load <4 x i8>, ptr getelementptr (i8, ptr null, i32 8), align 1
+ %li15 = extractelement <4 x i8> %l, i64 15
+ %2 = icmp ne i8 %0, 0
+ %3 = icmp ne i8 %1, 0
+ %4 = icmp ne i8 %0, 0
+ %.i15 = icmp ne i8 %li15, 0
+
+ %i0244 = insertelement <4 x i1> zeroinitializer, i1 %2, i64 0
+ %i1245 = insertelement <4 x i1> %i0244, i1 %3, i64 1
+ %i2246 = insertelement <4 x i1> %i1245, i1 %4, i64 2
+ %14 = insertelement <4 x i1> %i2246, i1 %.i15, i64 3
+ ret void
+}
More information about the llvm-commits
mailing list