[llvm] e7f370f - [SLP] Check all copyable children for non-schedulable parent nodes

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 20 15:54:29 PDT 2025


Author: Alexey Bataev
Date: 2025-10-20T15:52:28-07:00
New Revision: e7f370f910701b6c67d41dab80e645227692c58b

URL: https://github.com/llvm/llvm-project/commit/e7f370f910701b6c67d41dab80e645227692c58b
DIFF: https://github.com/llvm/llvm-project/commit/e7f370f910701b6c67d41dab80e645227692c58b.diff

LOG: [SLP] Check all copyable children for non-schedulable parent nodes

If the parent node is non-schedulable and it includes several copies of
the same instruction, its operand might be replaced by the copyable
nodes in multiple children nodes, and if the instruction is commutative,
they can be used in different operands. The compiler shall consider this
opportunity, taking into account that non-copyable children are
scheduled only ones for the same parent instruction.

Fixes #164242

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3f18bd70539a0..106cde352e0b5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5578,7 +5578,7 @@ class BoUpSLP {
           // Decrement the unscheduled counter and insert to ready list if
           // ready.
           auto DecrUnschedForInst = [&](Instruction *I, TreeEntry *UserTE,
-                                        unsigned OpIdx) {
+                                        unsigned OpIdx, bool FirstRun = false) {
             if (!ScheduleCopyableDataMap.empty()) {
               const EdgeInfo EI = {UserTE, OpIdx};
               if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) {
@@ -5586,6 +5586,8 @@ class BoUpSLP {
                 return;
               }
             }
+            if (!FirstRun)
+              return;
             auto It = OperandsUses.find(I);
             assert(It != OperandsUses.end() && "Operand not found");
             if (It->second > 0) {
@@ -5602,37 +5604,48 @@ class BoUpSLP {
               break;
             // Need to search for the lane since the tree entry can be
             // reordered.
-            int Lane = std::distance(Bundle->getTreeEntry()->Scalars.begin(),
-                                     find(Bundle->getTreeEntry()->Scalars, In));
-            assert(Lane >= 0 && "Lane not set");
-            if (isa<StoreInst>(In) &&
-                !Bundle->getTreeEntry()->ReorderIndices.empty())
-              Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
-            assert(Lane < static_cast<int>(
-                              Bundle->getTreeEntry()->Scalars.size()) &&
-                   "Couldn't find extract lane");
-
-            // Since vectorization tree is being built recursively this
-            // assertion ensures that the tree entry has all operands set before
-            // reaching this code. Couple of exceptions known at the moment are
-            // extracts where their second (immediate) operand is not added.
-            // Since immediates do not affect scheduler behavior this is
-            // considered okay.
-            assert(In &&
-                   (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
-                    In->getNumOperands() ==
-                        Bundle->getTreeEntry()->getNumOperands() ||
-                    Bundle->getTreeEntry()->isCopyableElement(In)) &&
-                   "Missed TreeEntry operands?");
-
-            for (unsigned OpIdx :
-                 seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
-              if (auto *I = dyn_cast<Instruction>(
-                      Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
-                LLVM_DEBUG(dbgs() << "SLP:   check for readiness (def): " << *I
-                                  << "\n");
-                DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx);
-              }
+            auto *It = find(Bundle->getTreeEntry()->Scalars, In);
+            bool FirstRun = true;
+            do {
+              int Lane =
+                  std::distance(Bundle->getTreeEntry()->Scalars.begin(), It);
+              assert(Lane >= 0 && "Lane not set");
+              if (isa<StoreInst>(In) &&
+                  !Bundle->getTreeEntry()->ReorderIndices.empty())
+                Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
+              assert(Lane < static_cast<int>(
+                                Bundle->getTreeEntry()->Scalars.size()) &&
+                     "Couldn't find extract lane");
+
+              // Since vectorization tree is being built recursively this
+              // assertion ensures that the tree entry has all operands set
+              // before reaching this code. Couple of exceptions known at the
+              // moment are extracts where their second (immediate) operand is
+              // not added. Since immediates do not affect scheduler behavior
+              // this is considered okay.
+              assert(In &&
+                     (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
+                      In->getNumOperands() ==
+                          Bundle->getTreeEntry()->getNumOperands() ||
+                      Bundle->getTreeEntry()->isCopyableElement(In)) &&
+                     "Missed TreeEntry operands?");
+
+              for (unsigned OpIdx :
+                   seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
+                if (auto *I = dyn_cast<Instruction>(
+                        Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
+                  LLVM_DEBUG(dbgs() << "SLP:   check for readiness (def): "
+                                    << *I << "\n");
+                  DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx,
+                                     FirstRun);
+                }
+              // If parent node is schedulable, it will be handle correctly.
+              if (!Bundle->getTreeEntry()->doesNotNeedToSchedule())
+                break;
+              It = std::find(std::next(It),
+                             Bundle->getTreeEntry()->Scalars.end(), In);
+              FirstRun = false;
+            } while (It != Bundle->getTreeEntry()->Scalars.end());
           }
         } else {
           // If BundleMember is a stand-alone instruction, no operand reordering

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
new file mode 100644
index 0000000000000..7accca311af3c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[BB1:.*]], label %[[BB6:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ <i32 0, i32 0, i32 poison, i32 0>, %[[BB]] ], [ <i32 0, i32 0, i32 -1, i32 -1>, %[[BB1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    ret void
+;
+bb:
+  br i1 false, label %bb1, label %bb6
+
+bb1:
+  %add = add i32 0, 0
+  %shl = shl i32 %add, 0
+  %sub = sub i32 0, 1
+  %add2 = add i32 %sub, %shl
+  %add3 = add i32 0, 0
+  %shl4 = shl i32 %add3, 0
+  %ashr = ashr i32 %shl4, 1
+  %add5 = add i32 0, 0
+  br label %bb6
+
+bb6:
+  %phi = phi i32 [ poison, %bb ], [ %add2, %bb1 ]
+  %phi7 = phi i32 [ 0, %bb ], [ %ashr, %bb1 ]
+  %phi8 = phi i32 [ 0, %bb ], [ %add2, %bb1 ]
+  %phi9 = phi i32 [ 0, %bb ], [ %add5, %bb1 ]
+  %or = or i32 %phi8, 0
+  ret void
+}


        


More information about the llvm-commits mailing list