[llvm] 306b5a3 - [SLP]Do not consider split nodes, when checking parent PHI-based nodes
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 16 13:49:22 PST 2025
Author: Alexey Bataev
Date: 2025-11-16T12:39:58-08:00
New Revision: 306b5a3d64b4abbfb07bcfb77fadb506da897f16
URL: https://github.com/llvm/llvm-project/commit/306b5a3d64b4abbfb07bcfb77fadb506da897f16
DIFF: https://github.com/llvm/llvm-project/commit/306b5a3d64b4abbfb07bcfb77fadb506da897f16.diff
LOG: [SLP]Do not consider split nodes, when checking parent PHI-based nodes
The compiler should not consider split vectorize nodes, when checking
for non-schedulable PHI-based parent nodes. Only pure PHI nodes must be
considered, they only can be considered as explicit users, split nodes
are not.
Fixes #168268
Added:
llvm/test/Transforms/SLPVectorizer/X86/parent-node-split-non-schedulable.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 938eacde7548d..ff7149044d199 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5340,6 +5340,7 @@ class BoUpSLP {
bool IsNonSchedulableWithParentPhiNode =
TE->doesNotNeedToSchedule() && TE->UserTreeIndex &&
TE->UserTreeIndex.UserTE->hasState() &&
+ TE->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
// Count the number of unique phi nodes, which are the parent for
// parent entry, and exit, if all the unique phis are processed.
@@ -5391,6 +5392,7 @@ class BoUpSLP {
bool IsNonSchedulableWithParentPhiNode =
P.first->doesNotNeedToSchedule() && P.first->UserTreeIndex &&
P.first->UserTreeIndex.UserTE->hasState() &&
+ P.first->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
P.first->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
auto *It = find(P.first->Scalars, User);
do {
@@ -5690,6 +5692,8 @@ class BoUpSLP {
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
Bundle->getTreeEntry()->UserTreeIndex &&
Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() &&
+ Bundle->getTreeEntry()->UserTreeIndex.UserTE->State !=
+ TreeEntry::SplitVectorize &&
Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() ==
Instruction::PHI;
// Count the number of unique phi nodes, which are the parent for
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-split-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-split-non-schedulable.ll
new file mode 100644
index 0000000000000..62335e48b2fe4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-split-non-schedulable.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @main(ptr %c, i32 %0, i1 %tobool4.not, i16 %1) {
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: ptr [[C:%.*]], i32 [[TMP0:%.*]], i1 [[TOBOOL4_NOT:%.*]], i16 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP0]], i32 1
+; CHECK-NEXT: br label %[[IF_END:.*]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[B_0_PH:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP32:%.*]], %[[WHILE_COND_PREHEADER:.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP33:%.*]], %[[WHILE_COND_PREHEADER]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[B_0_PH]], i32 0
+; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[R:.*]], label %[[IF_END9:.*]]
+; CHECK: [[IF_END9]]:
+; CHECK-NEXT: [[CONV11:%.*]] = sext i16 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[CONV11]], i32 0
+; CHECK-NEXT: br label %[[R]]
+; CHECK: [[R]]:
+; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ <i32 1, i32 0>, %[[IF_END9]] ], [ [[TMP2]], %[[IF_END]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP5]], %[[IF_END9]] ], [ [[TMP4]], %[[IF_END]] ]
+; CHECK-NEXT: [[TOBOOL12_NOT:%.*]] = icmp eq i32 [[B_0_PH]], 0
+; CHECK-NEXT: br i1 [[TOBOOL12_NOT]], label %[[IF_END14:.*]], label %[[IF_THEN13:.*]]
+; CHECK: [[IF_THEN13]]:
+; CHECK-NEXT: br label %[[IF_END14]]
+; CHECK: [[IF_END14]]:
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP8]], 1
+; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[AND]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+; CHECK-NEXT: [[AND17:%.*]] = and i32 [[TMP9]], 1
+; CHECK-NEXT: [[DIV20:%.*]] = sdiv i32 [[AND17]], [[TMP0]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[C]], align 4
+; CHECK-NEXT: [[AND25:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[AND17]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP10]], i32 2
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[AND25]], i32 3
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> <i32 0, i32 1, i32 poison, i32 1>, i32 [[DIV20]], i32 2
+; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP14]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> poison, i32 [[NOT]], i32 2
+; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP11]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 5>
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[B_0_PH]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP0]], i32 2
+; CHECK-NEXT: br label %[[AH:.*]]
+; CHECK: [[AH]]:
+; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP21]], %[[AH]] ], [ [[TMP16]], %[[IF_END14]] ]
+; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP22]], %[[AH]] ], [ [[TMP20]], %[[IF_END14]] ]
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
+; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP24]], <2 x i32> <i32 2, i32 6>
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1
+; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[ADD]], [[TMP28]]
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3
+; CHECK-NEXT: [[OR27:%.*]] = or i32 [[TMP29]], [[TMP30]]
+; CHECK-NEXT: store i32 [[OR27]], ptr [[C]], align 4
+; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[WHILE_COND_PREHEADER]], label %[[AH]]
+; CHECK: [[WHILE_COND_PREHEADER]]:
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
+; CHECK-NEXT: [[CALL69:%.*]] = tail call i32 @s(i32 [[TMP31]])
+; CHECK-NEXT: [[TMP32]] = extractelement <4 x i32> [[TMP23]], i32 0
+; CHECK-NEXT: [[TMP33]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: br label %[[IF_END]]
+;
+entry:
+ br label %if.end
+
+if.end:
+ %n.0.ph = phi i32 [ 0, %entry ], [ %.us-phi52, %while.cond.preheader ]
+ %b.0.ph = phi i32 [ 0, %entry ], [ %b.2, %while.cond.preheader ]
+ %a.0.ph = phi i32 [ 0, %entry ], [ %a.2, %while.cond.preheader ]
+ br i1 %tobool4.not, label %r, label %if.end9
+
+if.end9:
+ %conv11 = sext i16 %1 to i32
+ br label %r
+
+r:
+ %.us-phi51642 = phi i32 [ 0, %if.end9 ], [ %0, %if.end ]
+ %.us-phi415662 = phi i32 [ 0, %if.end9 ], [ 1, %if.end ]
+ %b.1 = phi i32 [ %conv11, %if.end9 ], [ %b.0.ph, %if.end ]
+ %a.1 = phi i32 [ 1, %if.end9 ], [ 0, %if.end ]
+ %tobool12.not = icmp eq i32 %b.0.ph, 0
+ br i1 %tobool12.not, label %if.end14, label %if.then13
+
+if.then13:
+ br label %if.end14
+
+if.end14:
+ %and = and i32 %n.0.ph, 1
+ %not = xor i32 %and, 1
+ %and17 = and i32 %a.0.ph, 1
+ %not18 = xor i32 %and17, 1
+ %div20 = sdiv i32 %and17, %0
+ %2 = load i32, ptr %c, align 4
+ %3 = xor i32 %2, %div20
+ %and25 = and i32 %0, 1
+ %not26 = xor i32 %and25, 1
+ br label %ah
+
+ah:
+ %.us-phi4154 = phi i32 [ 0, %ah ], [ %.us-phi415662, %if.end14 ]
+ %.us-phi52 = phi i32 [ 0, %ah ], [ %.us-phi51642, %if.end14 ]
+ %b.2 = phi i32 [ %b.0.ph, %ah ], [ %b.1, %if.end14 ]
+ %a.2 = phi i32 [ 0, %ah ], [ %a.1, %if.end14 ]
+ %l.1 = phi i32 [ %0, %ah ], [ %not, %if.end14 ]
+ %p16.1 = phi i32 [ 0, %ah ], [ %not18, %if.end14 ]
+ %q.1 = phi i32 [ 0, %ah ], [ %3, %if.end14 ]
+ %r23.1 = phi i32 [ 0, %ah ], [ %not26, %if.end14 ]
+ %add = add i32 %q.1, %l.1
+ %4 = or i32 %add, %p16.1
+ %or27 = or i32 %4, %r23.1
+ store i32 %or27, ptr %c, align 4
+ br i1 %tobool4.not, label %while.cond.preheader, label %ah
+
+while.cond.preheader:
+ %call69 = tail call i32 @s(i32 %.us-phi4154)
+ br label %if.end
+}
+
+declare i32 @s(i32)
More information about the llvm-commits
mailing list