[llvm] 77bec78 - [SLP]Do not look for last instruction in schedule block for buildvectors

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 06:55:41 PST 2024


Author: Alexey Bataev
Date: 2024-11-08T06:55:29-08:00
New Revision: 77bec78878762e34150fe23734fa43df796c873c

URL: https://github.com/llvm/llvm-project/commit/77bec78878762e34150fe23734fa43df796c873c
DIFF: https://github.com/llvm/llvm-project/commit/77bec78878762e34150fe23734fa43df796c873c.diff

LOG: [SLP]Do not look for last instruction in schedule block for buildvectors

If looking for the insertion point for the node and the node is
a buildvector node, the compiler should not use scheduling info for such
nodes, they may contain only partial info, which is not fully correct
and may cause compiler crash.

Fixes #114082

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b2f677fb84f983..c5b3537bc57bcb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13398,7 +13398,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
   // scheduled, and the last instruction is VL.back(). So we start with
   // VL.back() and iterate over schedule data until we reach the end of the
   // bundle. The end of the bundle is marked by null ScheduleData.
-  if (BlocksSchedules.count(BB)) {
+  if (BlocksSchedules.count(BB) && !E->isGather()) {
     Value *V = E->isOneOf(E->Scalars.back());
     if (doesNotNeedToBeScheduled(V))
       V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled);

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
new file mode 100644
index 00000000000000..a0b390011faa6f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 1, 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[ADD]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp samesign ult <4 x i32> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp samesign ult i32 0, 0
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[ICMP]], i32 0, i32 0
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) null, i64 [[ZEXT]]
+; CHECK-NEXT:    store ptr addrspace(1) null, ptr addrspace(1) [[GETELEMENTPTR]], align 8
+; CHECK-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 null(<2 x double> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[CALL]], i32 3
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> poison, <4 x i1> [[TMP3]], i64 0)
+; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP4]], <4 x i1> [[TMP1]], i64 4)
+; CHECK-NEXT:    ret void
+;
+bb:
+  %icmp = icmp samesign ult i32 0, 0
+  %select = select i1 %icmp, i32 0, i32 0
+  %zext = zext i32 %select to i64
+  %getelementptr = getelementptr ptr addrspace(1), ptr addrspace(1) null, i64 %zext
+  store ptr addrspace(1) null, ptr addrspace(1) %getelementptr, align 8
+  %icmp1 = icmp eq i32 0, 0
+  %icmp2 = icmp eq i32 0, 0
+  %icmp3 = icmp samesign ult i32 0, 0
+  %icmp4 = icmp eq i32 0, 0
+  %add = add i32 1, 0
+  %icmp5 = icmp samesign ult i32 %add, 0
+  store volatile i32 0, ptr addrspace(1) null, align 4
+  %call = call i32 null(<2 x double> zeroinitializer)
+  %icmp6 = icmp eq i32 %call, 0
+  %icmp7 = icmp samesign ult i32 0, 0
+  ret void
+}


        


More information about the llvm-commits mailing list