[llvm] 3dc5259 - [SLP]Do not build bundle for copyables, with parents used in PHI node

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 18 13:38:00 PST 2026


Author: Alexey Bataev
Date: 2026-01-18T13:37:51-08:00
New Revision: 3dc5259bc8908a516ea3c90278339d5d40e397d5

URL: https://github.com/llvm/llvm-project/commit/3dc5259bc8908a516ea3c90278339d5d40e397d5
DIFF: https://github.com/llvm/llvm-project/commit/3dc5259bc8908a516ea3c90278339d5d40e397d5.diff

LOG: [SLP]Do not build bundle for copyables, with parents used in PHI node

If the copyables have parents, used in PHI nodes, this causes complex
schedulable/non-schedulable dependecies, which require complex
processing, but with small profitability. Cut such case early for now to
prevent compiler crashes and compile time blow up.

Fixes #176658

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4c4901c314406..762b394f8ea8a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21794,6 +21794,23 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
         if (!Values.insert(std::make_pair(V, Op)).second)
           return std::nullopt;
       }
+    } else {
+      // If any of the parent requires scheduling - exit, complex dep between
+      // schedulable/non-schedulable parents.
+      if (any_of(EI.UserTE->Scalars, [&](Value *V) {
+            if (EI.UserTE->hasCopyableElements() &&
+                EI.UserTE->isCopyableElement(V))
+              return false;
+            ArrayRef<TreeEntry *> Entries = SLP->getTreeEntries(V);
+            return any_of(Entries, [](const TreeEntry *TE) {
+              return TE->doesNotNeedToSchedule() && TE->UserTreeIndex &&
+                     TE->UserTreeIndex.UserTE->hasState() &&
+                     TE->UserTreeIndex.UserTE->State !=
+                         TreeEntry::SplitVectorize &&
+                     TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
+            });
+          }))
+        return std::nullopt;
     }
   }
   bool HasCopyables = S.areInstructionsWithCopyableElements();

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll
new file mode 100644
index 0000000000000..88f4520aa7367
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/copyables-with-parent-scalars-in-phis.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define i32 @test() {
+; CHECK-LABEL: define i32 @test() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br label %[[BB3:.*]]
+; CHECK:       [[BB1:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP6:%.*]], %[[BB3]] ]
+; CHECK-NEXT:    ret i32 0
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr null, align 8
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[LOAD]] to i32
+; CHECK-NEXT:    [[TRUNC6:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TRUNC6]], 0
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 0, [[AND]]
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[AND]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TRUNC]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr <2 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP9]] = or <2 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    br i1 false, label %[[BB1]], label %[[BB3]]
+;
+bb:
+  br label %bb3
+
+bb1:
+  %phi = phi i32 [ %or, %bb3 ]
+  %phi2 = phi i32 [ %ashr7, %bb3 ]
+  ret i32 0
+
+bb3:
+  %phi4 = phi i32 [ 0, %bb ], [ %or, %bb3 ]
+  %phi5 = phi i32 [ 0, %bb ], [ %or8, %bb3 ]
+  %load = load i64, ptr null, align 8
+  %trunc = trunc i64 %load to i32
+  %or = or i32 %phi4, %trunc
+  %trunc6 = trunc i64 0 to i32
+  %and = and i32 %trunc6, 0
+  %ashr = ashr i32 0, %and
+  %ashr7 = ashr i32 %phi5, %and
+  %or8 = or i32 %ashr7, 0
+  br i1 false, label %bb1, label %bb3
+}
+

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
index f1e35951cffc1..5e85ecd610ebd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
@@ -7,7 +7,7 @@ define i32 @test() {
 ; CHECK-NEXT:    br label %[[BB1:.*]]
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], %[[BB24:.*]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <4 x i32> [ [[TMP13:%.*]], %[[BB24]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <4 x i32> [ [[TMP17:%.*]], %[[BB24]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
 ; CHECK-NEXT:    br i1 false, label %[[BB4:.*]], label %[[BB11:.*]]
 ; CHECK:       [[BB4]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ zeroinitializer, %[[BB1]] ]
@@ -31,13 +31,12 @@ define i32 @test() {
 ; CHECK-NEXT:    br label %[[BB24]]
 ; CHECK:       [[BB24]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 poison, i32 poison, i32 0, i32 0>
-; CHECK-NEXT:    [[TMP10:%.*]] = and <4 x i32> [[TMP9]], <i32 poison, i32 poison, i32 -1, i32 0>
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP13]] = lshr <4 x i32> [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and <4 x i32> [[TMP9]], <i32 poison, i32 poison, i32 0, i32 -1>
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP14:%.*]] = lshr <4 x i32> [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = or <4 x i32> [[TMP11]], [[TMP10]]
-; CHECK-NEXT:    [[TMP16]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP16]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP17]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
 ; CHECK-NEXT:    br label %[[BB1]]
 ;
 bb:


        


More information about the llvm-commits mailing list