[llvm] 3dc5259 - [SLP]Do not build bundle for copyables, with parents used in PHI node
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 18 13:38:00 PST 2026
Author: Alexey Bataev
Date: 2026-01-18T13:37:51-08:00
New Revision: 3dc5259bc8908a516ea3c90278339d5d40e397d5
URL: https://github.com/llvm/llvm-project/commit/3dc5259bc8908a516ea3c90278339d5d40e397d5
DIFF: https://github.com/llvm/llvm-project/commit/3dc5259bc8908a516ea3c90278339d5d40e397d5.diff
LOG: [SLP]Do not build bundle for copyables, with parents used in PHI node
If the copyables have parents, used in PHI nodes, this causes complex
schedulable/non-schedulable dependecies, which require complex
processing, but with small profitability. Cut such case early for now to
prevent compiler crashes and compile time blow up.
Fixes #176658
Added:
llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4c4901c314406..762b394f8ea8a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21794,6 +21794,23 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
if (!Values.insert(std::make_pair(V, Op)).second)
return std::nullopt;
}
+ } else {
+ // If any of the parent requires scheduling - exit, complex dep between
+ // schedulable/non-schedulable parents.
+ if (any_of(EI.UserTE->Scalars, [&](Value *V) {
+ if (EI.UserTE->hasCopyableElements() &&
+ EI.UserTE->isCopyableElement(V))
+ return false;
+ ArrayRef<TreeEntry *> Entries = SLP->getTreeEntries(V);
+ return any_of(Entries, [](const TreeEntry *TE) {
+ return TE->doesNotNeedToSchedule() && TE->UserTreeIndex &&
+ TE->UserTreeIndex.UserTE->hasState() &&
+ TE->UserTreeIndex.UserTE->State !=
+ TreeEntry::SplitVectorize &&
+ TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
+ });
+ }))
+ return std::nullopt;
}
}
bool HasCopyables = S.areInstructionsWithCopyableElements();
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll
new file mode 100644
index 0000000000000..88f4520aa7367
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define i32 @test() {
+; CHECK-LABEL: define i32 @test() {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: br label %[[BB3:.*]]
+; CHECK: [[BB1:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP6:%.*]], %[[BB3]] ]
+; CHECK-NEXT: ret i32 0
+; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr null, align 8
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[LOAD]] to i32
+; CHECK-NEXT: [[TRUNC6:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[TRUNC6]], 0
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 0, [[AND]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[AND]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TRUNC]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = ashr <2 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP6]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT: [[TMP9]] = or <2 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: br i1 false, label %[[BB1]], label %[[BB3]]
+;
+bb:
+ br label %bb3
+
+bb1:
+ %phi = phi i32 [ %or, %bb3 ]
+ %phi2 = phi i32 [ %ashr7, %bb3 ]
+ ret i32 0
+
+bb3:
+ %phi4 = phi i32 [ 0, %bb ], [ %or, %bb3 ]
+ %phi5 = phi i32 [ 0, %bb ], [ %or8, %bb3 ]
+ %load = load i64, ptr null, align 8
+ %trunc = trunc i64 %load to i32
+ %or = or i32 %phi4, %trunc
+ %trunc6 = trunc i64 0 to i32
+ %and = and i32 %trunc6, 0
+ %ashr = ashr i32 0, %and
+ %ashr7 = ashr i32 %phi5, %and
+ %or8 = or i32 %ashr7, 0
+ br i1 false, label %bb1, label %bb3
+}
+
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
index f1e35951cffc1..5e85ecd610ebd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
@@ -7,7 +7,7 @@ define i32 @test() {
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], %[[BB24:.*]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i32> [ [[TMP13:%.*]], %[[BB24]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i32> [ [[TMP17:%.*]], %[[BB24]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
; CHECK-NEXT: br i1 false, label %[[BB4:.*]], label %[[BB11:.*]]
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ zeroinitializer, %[[BB1]] ]
@@ -31,13 +31,12 @@ define i32 @test() {
; CHECK-NEXT: br label %[[BB24]]
; CHECK: [[BB24]]:
; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 poison, i32 poison, i32 0, i32 0>
-; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i32> [[TMP9]], <i32 poison, i32 poison, i32 -1, i32 0>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP13]] = lshr <4 x i32> [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i32> [[TMP9]], <i32 poison, i32 poison, i32 0, i32 -1>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP11]], [[TMP10]]
-; CHECK-NEXT: [[TMP16]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 3>
+; CHECK-NEXT: [[TMP16]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
+; CHECK-NEXT: [[TMP17]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
; CHECK-NEXT: br label %[[BB1]]
;
bb:
More information about the llvm-commits
mailing list