[llvm] bbd1bb4 - [SLP]Set insert point for split node with non-scheulable instructions after the last instruction

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 14 07:05:04 PDT 2025


Author: Alexey Bataev
Date: 2025-03-14T07:04:55-07:00
New Revision: bbd1bb40573dd5a36dc04af4ad307d6e16456f89

URL: https://github.com/llvm/llvm-project/commit/bbd1bb40573dd5a36dc04af4ad307d6e16456f89
DIFF: https://github.com/llvm/llvm-project/commit/bbd1bb40573dd5a36dc04af4ad307d6e16456f89.diff

LOG: [SLP]Set insert point for split node with non-scheulable instructions after the last instruction

Need to set the insert point for non-schedulable instructions in
SplitVectorize node after the last instruction, not before, to avoid
a crash in case of buildvector subvector node.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a2200f283168d..184d5bf4a86c0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14874,7 +14874,9 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
   bool IsPHI = isa<PHINode>(LastInst);
   if (IsPHI)
     LastInstIt = LastInst->getParent()->getFirstNonPHIIt();
-  if (IsPHI || (!E->isGather() && doesNotNeedToSchedule(E->Scalars)) ||
+  if (IsPHI ||
+      (!E->isGather() && E->State != TreeEntry::SplitVectorize &&
+       doesNotNeedToSchedule(E->Scalars)) ||
       (GatheredLoadsEntriesFirst.has_value() &&
        E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
        E->getOpcode() == Instruction::Load)) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll
new file mode 100644
index 0000000000000..eaf7bb2c9fdce
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i64 @test(i256 %0, { i32, i1 } %1) {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: i256 [[TMP0:%.*]], { i32, i1 } [[TMP1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i256 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 2
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP14:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP13]], <4 x i32> [[TMP12]], i64 4)
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne <8 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i1> [[TMP15]] to i8
+; CHECK-NEXT:    [[TMP17:%.*]] = call i8 @llvm.ctpop.i8(i8 [[TMP16]])
+; CHECK-NEXT:    [[TMP18:%.*]] = zext i8 [[TMP17]] to i64
+; CHECK-NEXT:    ret i64 [[TMP18]]
+;
+entry:
+  %2 = extractvalue { i32, i1 } %1, 0
+  %cmp = icmp ne i32 %2, 0
+  %cond = zext i1 %cmp to i64
+  %conv = trunc i256 %0 to i32
+  %cmp8 = icmp ne i32 %conv, 0
+  %cond10 = zext i1 %cmp8 to i64
+  %3 = add i64 %cond10, %cond
+  %cmp24 = icmp ne i32 %conv, 0
+  %cond26 = zext i1 %cmp24 to i64
+  %4 = add i64 %3, %cond26
+  %5 = extractvalue { i32, i1 } %1, 0
+  %cmp42 = icmp ne i32 %5, 0
+  %cond44 = zext i1 %cmp42 to i64
+  %6 = add i64 %4, %cond44
+  %conv47 = trunc i256 %0 to i32
+  %cmp54 = icmp ne i32 %conv47, 0
+  %cond56 = zext i1 %cmp54 to i64
+  %7 = add i64 %6, %cond56
+  %cmp70 = icmp ne i32 %conv47, 0
+  %cond72 = zext i1 %cmp70 to i64
+  %8 = add i64 %7, %cond72
+  %9 = extractvalue { i32, i1 } %1, 0
+  %cmp87 = icmp ne i32 %9, 0
+  %cond89 = zext i1 %cmp87 to i64
+  %10 = add i64 %8, %cond89
+  %cmp92 = icmp ne i32 %9, 0
+  %cond94 = zext i1 %cmp92 to i64
+  %11 = add i64 %10, %cond94
+  ret i64 %11
+}


        


More information about the llvm-commits mailing list