[llvm] 77bec78 - [SLP]Do not look for last instruction in schedule block for buildvectors
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 06:55:41 PST 2024
Author: Alexey Bataev
Date: 2024-11-08T06:55:29-08:00
New Revision: 77bec78878762e34150fe23734fa43df796c873c
URL: https://github.com/llvm/llvm-project/commit/77bec78878762e34150fe23734fa43df796c873c
DIFF: https://github.com/llvm/llvm-project/commit/77bec78878762e34150fe23734fa43df796c873c.diff
LOG: [SLP]Do not look for last instruction in schedule block for buildvectors
If looking for the insertion point for the node and the node is
a buildvector node, the compiler should not use scheduling info for such
nodes, they may contain only partial info, which is not fully correct
and may cause compiler crash.
Fixes #114082
Added:
llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b2f677fb84f983..c5b3537bc57bcb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13398,7 +13398,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// scheduled, and the last instruction is VL.back(). So we start with
// VL.back() and iterate over schedule data until we reach the end of the
// bundle. The end of the bundle is marked by null ScheduleData.
- if (BlocksSchedules.count(BB)) {
+ if (BlocksSchedules.count(BB) && !E->isGather()) {
Value *V = E->isOneOf(E->Scalars.back());
if (doesNotNeedToBeScheduled(V))
V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
new file mode 100644
index 00000000000000..a0b390011faa6f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 1, 0
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[ADD]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = icmp samesign ult <4 x i32> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[ICMP:%.*]] = icmp samesign ult i32 0, 0
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], i32 0, i32 0
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) null, i64 [[ZEXT]]
+; CHECK-NEXT: store ptr addrspace(1) null, ptr addrspace(1) [[GETELEMENTPTR]], align 8
+; CHECK-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
+; CHECK-NEXT: [[CALL:%.*]] = call i32 null(<2 x double> zeroinitializer)
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[CALL]], i32 3
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> poison, <4 x i1> [[TMP3]], i64 0)
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP4]], <4 x i1> [[TMP1]], i64 4)
+; CHECK-NEXT: ret void
+;
+bb:
+ %icmp = icmp samesign ult i32 0, 0
+ %select = select i1 %icmp, i32 0, i32 0
+ %zext = zext i32 %select to i64
+ %getelementptr = getelementptr ptr addrspace(1), ptr addrspace(1) null, i64 %zext
+ store ptr addrspace(1) null, ptr addrspace(1) %getelementptr, align 8
+ %icmp1 = icmp eq i32 0, 0
+ %icmp2 = icmp eq i32 0, 0
+ %icmp3 = icmp samesign ult i32 0, 0
+ %icmp4 = icmp eq i32 0, 0
+ %add = add i32 1, 0
+ %icmp5 = icmp samesign ult i32 %add, 0
+ store volatile i32 0, ptr addrspace(1) null, align 4
+ %call = call i32 null(<2 x double> zeroinitializer)
+ %icmp6 = icmp eq i32 %call, 0
+ %icmp7 = icmp samesign ult i32 0, 0
+ ret void
+}
More information about the llvm-commits
mailing list