[llvm] cf1f489 - [SLP]Check only instructions with unique parent instruction user

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 28 11:14:32 PDT 2025


Author: Alexey Bataev
Date: 2025-10-28T11:14:18-07:00
New Revision: cf1f4896a714ae725e919a0781ef9c5b8817f40c

URL: https://github.com/llvm/llvm-project/commit/cf1f4896a714ae725e919a0781ef9c5b8817f40c
DIFF: https://github.com/llvm/llvm-project/commit/cf1f4896a714ae725e919a0781ef9c5b8817f40c.diff

LOG: [SLP]Check only instructions with unique parent instruction user

Need to re-check the instruction with the non-schedulable parent, only
if this parent has a user phi node (i.e. it is used only outside the
  block) and the user instruction has unique parent instruction.

Fixes issue reported in https://github.com/llvm/llvm-project/commit/20675ee67d048a42482c246e25b284637d55347c#commitcomment-168863594

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4fcaf6dabb513..43166c035fe7a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5608,6 +5608,7 @@ class BoUpSLP {
           for (ScheduleBundle *Bundle : Bundles) {
             if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
               break;
+            SmallPtrSet<Value *, 4> ParentsUniqueUsers;
             // Need to search for the lane since the tree entry can be
             // reordered.
             auto *It = find(Bundle->getTreeEntry()->Scalars, In);
@@ -5636,6 +5637,22 @@ class BoUpSLP {
                       Bundle->getTreeEntry()->isCopyableElement(In)) &&
                      "Missed TreeEntry operands?");
 
+              bool IsNonSchedulableWithParentPhiNode =
+                  Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
+                  Bundle->getTreeEntry()->UserTreeIndex &&
+                  Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() &&
+                  Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() ==
+                      Instruction::PHI;
+              // Count the number of unique phi nodes, which are the parent for
+              // parent entry, and exit, if all the unique phis are processed.
+              if (IsNonSchedulableWithParentPhiNode) {
+                const TreeEntry *ParentTE =
+                    Bundle->getTreeEntry()->UserTreeIndex.UserTE;
+                Value *User = ParentTE->Scalars[Lane];
+                if (!ParentsUniqueUsers.insert(User).second)
+                  break;
+              }
+
               for (unsigned OpIdx :
                    seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
                 if (auto *I = dyn_cast<Instruction>(
@@ -5644,8 +5661,8 @@ class BoUpSLP {
                                     << *I << "\n");
                   DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx, Checked);
                 }
-              // If parent node is schedulable, it will be handle correctly.
-              if (!Bundle->getTreeEntry()->doesNotNeedToSchedule())
+              // If parent node is schedulable, it will be handled correctly.
+              if (!IsNonSchedulableWithParentPhiNode)
                 break;
               It = std::find(std::next(It),
                              Bundle->getTreeEntry()->Scalars.end(), In);

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll
new file mode 100644
index 0000000000000..9e96e93a3205b
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i64 @test(ptr %arg1, i64 %alloca.promoted344, i8 %load.311.i, i1 %load1.i) {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: ptr [[ARG1:%.*]], i64 [[ALLOCA_PROMOTED344:%.*]], i8 [[LOAD_311_I:%.*]], i1 [[LOAD1_I:%.*]]) {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> <i8 0, i8 0, i8 0, i8 poison>, i8 [[LOAD_311_I]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> <i8 poison, i8 poison, i8 0, i8 0>, i8 [[LOAD_311_I]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[ALLOCA_PROMOTED344]], i32 0
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[BB]] ], [ [[TMP28:%.*]], %[[BB12_8_I:.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <8 x i8> [ zeroinitializer, %[[BB]] ], [ [[TMP29:%.*]], %[[BB12_8_I]] ]
+; CHECK-NEXT:    br i1 [[LOAD1_I]], label %[[SPAM_EXIT:.*]], label %[[BB4_LR_PH_I:.*]]
+; CHECK:       [[BB4_LR_PH_I]]:
+; CHECK-NEXT:    br i1 true, label %[[BB3_I_I_PEEL:.*]], label %[[EGGS_EXIT_I_PEEL:.*]]
+; CHECK:       [[BB3_I_I_PEEL]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i64> [[TMP3]], splat (i64 1)
+; CHECK-NEXT:    [[LOAD4_I_I_PEEL:%.*]] = load i64, ptr [[ARG1]], align 8
+; CHECK-NEXT:    [[SHL_I_I_PEEL:%.*]] = shl i64 [[LOAD4_I_I_PEEL]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[SHL_I_I_PEEL]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    br label %[[EGGS_EXIT_I_PEEL]]
+; CHECK:       [[EGGS_EXIT_I_PEEL]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi <2 x i64> [ [[TMP10]], %[[BB3_I_I_PEEL]] ], [ zeroinitializer, %[[BB4_LR_PH_I]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i64> [[TMP12]], i32 1
+; CHECK-NEXT:    br label %[[SPAM_EXIT]]
+; CHECK:       [[SPAM_EXIT]]:
+; CHECK-NEXT:    [[GETELEMENTPTR_I_I_PROMOTED346:%.*]] = phi i64 [ [[TMP14]], %[[EGGS_EXIT_I_PEEL]] ], [ 0, %[[BB2]] ]
+; CHECK-NEXT:    [[LOAD_8_I:%.*]] = phi i8 [ 0, %[[EGGS_EXIT_I_PEEL]] ], [ 1, %[[BB2]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi <4 x i8> [ [[TMP13]], %[[EGGS_EXIT_I_PEEL]] ], [ zeroinitializer, %[[BB2]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br i1 [[LOAD1_I]], label %[[BB12_8_I]], label %[[BB12_1_THREAD_I:.*]]
+; CHECK:       [[BB12_1_THREAD_I]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0
+; CHECK-NEXT:    [[ICMP5_3_I:%.*]] = icmp eq i8 [[TMP17]], 0
+; CHECK-NEXT:    br i1 [[ICMP5_3_I]], label %[[BB12_3_I:.*]], label %[[BB8_3_I:.*]]
+; CHECK:       [[BB8_3_I]]:
+; CHECK-NEXT:    br label %[[BB12_3_I]]
+; CHECK:       [[BB12_3_I]]:
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <8 x i8> [[TMP4]], i32 1
+; CHECK-NEXT:    [[ICMP5_4_I:%.*]] = icmp eq i8 [[TMP18]], 0
+; CHECK-NEXT:    br i1 [[ICMP5_4_I]], label %[[BB12_4_I:.*]], label %[[BB8_4_I:.*]]
+; CHECK:       [[BB8_4_I]]:
+; CHECK-NEXT:    br label %[[BB12_4_I]]
+; CHECK:       [[BB12_4_I]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i8> [[TMP4]], i32 2
+; CHECK-NEXT:    [[ICMP5_5_I:%.*]] = icmp eq i8 [[TMP19]], 0
+; CHECK-NEXT:    br i1 [[ICMP5_5_I]], label %[[BB12_5_I:.*]], label %[[BB8_5_I:.*]]
+; CHECK:       [[BB8_5_I]]:
+; CHECK-NEXT:    br label %[[BB12_5_I]]
+; CHECK:       [[BB12_5_I]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i8> [[TMP4]], i32 3
+; CHECK-NEXT:    [[ICMP5_7_I:%.*]] = icmp eq i8 [[TMP20]], 0
+; CHECK-NEXT:    br i1 [[ICMP5_7_I]], label %[[BB12_7_I:.*]], label %[[BB8_7_I:.*]]
+; CHECK:       [[BB8_7_I]]:
+; CHECK-NEXT:    br label %[[BB12_7_I]]
+; CHECK:       [[BB12_7_I]]:
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <8 x i8> [[TMP4]], i32 4
+; CHECK-NEXT:    [[ICMP5_8_I:%.*]] = icmp eq i8 [[TMP21]], 0
+; CHECK-NEXT:    br i1 [[ICMP5_8_I]], label %[[BB12_8_I]], label %[[BB8_8_I:.*]]
+; CHECK:       [[BB8_8_I]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[LOAD_8_I]], i32 1
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x i8> poison, i8 [[LOAD_8_I]], i32 0
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <4 x i32> <i32 poison, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    br label %[[BB12_8_I]]
+; CHECK:       [[BB12_8_I]]:
+; CHECK-NEXT:    [[TMP26:%.*]] = phi <4 x i8> [ [[TMP0]], %[[BB12_7_I]] ], [ [[TMP22]], %[[BB8_8_I]] ], [ [[TMP15]], %[[SPAM_EXIT]] ]
+; CHECK-NEXT:    [[TMP27:%.*]] = phi <4 x i8> [ zeroinitializer, %[[BB12_7_I]] ], [ [[TMP25]], %[[BB8_8_I]] ], [ [[TMP16]], %[[SPAM_EXIT]] ]
+; CHECK-NEXT:    [[TMP28]] = insertelement <2 x i64> [[TMP2]], i64 [[GETELEMENTPTR_I_I_PROMOTED346]], i32 1
+; CHECK-NEXT:    [[TMP29]] = shufflevector <4 x i8> [[TMP26]], <4 x i8> [[TMP27]], <8 x i32> <i32 2, i32 7, i32 5, i32 0, i32 1, i32 3, i32 4, i32 6>
+; CHECK-NEXT:    br label %[[BB2]]
+;
+bb:
+  br label %bb2
+
+bb2:
+  %getelementptr.i.i.promoted = phi i64 [ 0, %bb ], [ %getelementptr.i.i.promoted346, %bb12.8.i ]
+  %alloca.promoted = phi i64 [ 0, %bb ], [ %alloca.promoted344, %bb12.8.i ]
+  %load.8.i231 = phi i8 [ 0, %bb ], [ %load.8.i239, %bb12.8.i ]
+  %load.7.i217 = phi i8 [ 0, %bb ], [ %load.7.i225, %bb12.8.i ]
+  %load.626.i200 = phi i8 [ 0, %bb ], [ %load.626.i208, %bb12.8.i ]
+  %load.6.i183 = phi i8 [ 0, %bb ], [ %load.6.i191, %bb12.8.i ]
+  %load.5.i167 = phi i8 [ 0, %bb ], [ %load.5.i175, %bb12.8.i ]
+  %load.418.i148 = phi i8 [ 0, %bb ], [ %load.418.i156, %bb12.8.i ]
+  %load.4.i129 = phi i8 [ 0, %bb ], [ %load.4.i137, %bb12.8.i ]
+  %load.3.i111 = phi i8 [ 0, %bb ], [ %load.3.i119, %bb12.8.i ]
+  br i1 %load1.i, label %spam.exit, label %bb4.lr.ph.i
+
+bb4.lr.ph.i:
+  br i1 true, label %bb3.i.i.peel, label %eggs.exit.i.peel
+
+bb3.i.i.peel:
+  %and.i.i.peel = and i64 %alloca.promoted, 1
+  %load4.i.i.peel = load i64, ptr %arg1, align 8
+  %shl.i.i.peel = shl i64 %load4.i.i.peel, 1
+  %or.i.i.peel = or i64 %shl.i.i.peel, %and.i.i.peel
+  %and6.i.i.peel = and i64 %getelementptr.i.i.promoted, 1
+  %xor.i.i.peel = xor i64 %and6.i.i.peel, %alloca.promoted
+  br label %eggs.exit.i.peel
+
+eggs.exit.i.peel:
+  %load5.i.i93.peel = phi i64 [ %xor.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ]
+  %or.i.i91.peel = phi i64 [ %or.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ]
+  %0 = trunc i64 %or.i.i91.peel to i8
+  %1 = trunc nuw i64 %or.i.i91.peel to i8
+  %2 = trunc i64 %load5.i.i93.peel to i8
+  br label %spam.exit
+
+spam.exit:
+  %getelementptr.i.i.promoted346 = phi i64 [ %load5.i.i93.peel, %eggs.exit.i.peel ], [ 0, %bb2 ]
+  %load.834.i = phi i8 [ %2, %eggs.exit.i.peel ], [ 0, %bb2 ]
+  %load.7.i25 = phi i8 [ %1, %eggs.exit.i.peel ], [ 0, %bb2 ]
+  %load.8.i = phi i8 [ 0, %eggs.exit.i.peel ], [ 1, %bb2 ]
+  %load.6.i18 = phi i8 [ %0, %eggs.exit.i.peel ], [ 0, %bb2 ]
+  br i1 %load1.i, label %bb12.8.i, label %bb12.1.thread.i
+
+bb12.1.thread.i:
+  %icmp5.3.i = icmp eq i8 %load.3.i111, 0
+  br i1 %icmp5.3.i, label %bb12.3.i, label %bb8.3.i
+
+bb8.3.i:
+  br label %bb12.3.i
+
+bb12.3.i:
+  %icmp5.4.i = icmp eq i8 %load.4.i129, 0
+  br i1 %icmp5.4.i, label %bb12.4.i, label %bb8.4.i
+
+bb8.4.i:
+  br label %bb12.4.i
+
+bb12.4.i:
+  %icmp5.5.i = icmp eq i8 %load.5.i167, 0
+  br i1 %icmp5.5.i, label %bb12.5.i, label %bb8.5.i
+
+bb8.5.i:
+  br label %bb12.5.i
+
+bb12.5.i:
+  %icmp5.7.i = icmp eq i8 %load.7.i217, 0
+  br i1 %icmp5.7.i, label %bb12.7.i, label %bb8.7.i
+
+bb8.7.i:
+  br label %bb12.7.i
+
+bb12.7.i:
+  %icmp5.8.i = icmp eq i8 %load.8.i231, 0
+  br i1 %icmp5.8.i, label %bb12.8.i, label %bb8.8.i
+
+bb8.8.i:
+  br label %bb12.8.i
+
+bb12.8.i:
+  %load.8.i239 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.834.i, %spam.exit ]
+  %load.7.i225 = phi i8 [ 0, %bb12.7.i ], [ %load.311.i, %bb8.8.i ], [ %load.7.i25, %spam.exit ]
+  %load.626.i208 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
+  %load.6.i191 = phi i8 [ %load.311.i, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
+  %load.5.i175 = phi i8 [ 0, %bb12.7.i ], [ %load.6.i183, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
+  %load.418.i156 = phi i8 [ 0, %bb12.7.i ], [ %load.626.i200, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
+  %load.4.i137 = phi i8 [ 0, %bb12.7.i ], [ %load.418.i148, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
+  %load.3.i119 = phi i8 [ 0, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
+  br label %bb2
+}


        


More information about the llvm-commits mailing list