[llvm] e7f370f - [SLP] Check all copyable children for non-schedulable parent nodes
    Alexey Bataev via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Mon Oct 20 15:54:29 PDT 2025
    
    
  
Author: Alexey Bataev
Date: 2025-10-20T15:52:28-07:00
New Revision: e7f370f910701b6c67d41dab80e645227692c58b
URL: https://github.com/llvm/llvm-project/commit/e7f370f910701b6c67d41dab80e645227692c58b
DIFF: https://github.com/llvm/llvm-project/commit/e7f370f910701b6c67d41dab80e645227692c58b.diff
LOG: [SLP] Check all copyable children for non-schedulable parent nodes
If the parent node is non-schedulable and it includes several copies of
the same instruction, its operand might be replaced by the copyable
nodes in multiple children nodes, and if the instruction is commutative,
they can be used in different operands. The compiler shall consider this
opportunity, taking into account that non-copyable children are
scheduled only ones for the same parent instruction.
Fixes #164242
Added: 
    llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed: 
    
################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3f18bd70539a0..106cde352e0b5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5578,7 +5578,7 @@ class BoUpSLP {
           // Decrement the unscheduled counter and insert to ready list if
           // ready.
           auto DecrUnschedForInst = [&](Instruction *I, TreeEntry *UserTE,
-                                        unsigned OpIdx) {
+                                        unsigned OpIdx, bool FirstRun = false) {
             if (!ScheduleCopyableDataMap.empty()) {
               const EdgeInfo EI = {UserTE, OpIdx};
               if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) {
@@ -5586,6 +5586,8 @@ class BoUpSLP {
                 return;
               }
             }
+            if (!FirstRun)
+              return;
             auto It = OperandsUses.find(I);
             assert(It != OperandsUses.end() && "Operand not found");
             if (It->second > 0) {
@@ -5602,37 +5604,48 @@ class BoUpSLP {
               break;
             // Need to search for the lane since the tree entry can be
             // reordered.
-            int Lane = std::distance(Bundle->getTreeEntry()->Scalars.begin(),
-                                     find(Bundle->getTreeEntry()->Scalars, In));
-            assert(Lane >= 0 && "Lane not set");
-            if (isa<StoreInst>(In) &&
-                !Bundle->getTreeEntry()->ReorderIndices.empty())
-              Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
-            assert(Lane < static_cast<int>(
-                              Bundle->getTreeEntry()->Scalars.size()) &&
-                   "Couldn't find extract lane");
-
-            // Since vectorization tree is being built recursively this
-            // assertion ensures that the tree entry has all operands set before
-            // reaching this code. Couple of exceptions known at the moment are
-            // extracts where their second (immediate) operand is not added.
-            // Since immediates do not affect scheduler behavior this is
-            // considered okay.
-            assert(In &&
-                   (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
-                    In->getNumOperands() ==
-                        Bundle->getTreeEntry()->getNumOperands() ||
-                    Bundle->getTreeEntry()->isCopyableElement(In)) &&
-                   "Missed TreeEntry operands?");
-
-            for (unsigned OpIdx :
-                 seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
-              if (auto *I = dyn_cast<Instruction>(
-                      Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
-                LLVM_DEBUG(dbgs() << "SLP:   check for readiness (def): " << *I
-                                  << "\n");
-                DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx);
-              }
+            auto *It = find(Bundle->getTreeEntry()->Scalars, In);
+            bool FirstRun = true;
+            do {
+              int Lane =
+                  std::distance(Bundle->getTreeEntry()->Scalars.begin(), It);
+              assert(Lane >= 0 && "Lane not set");
+              if (isa<StoreInst>(In) &&
+                  !Bundle->getTreeEntry()->ReorderIndices.empty())
+                Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
+              assert(Lane < static_cast<int>(
+                                Bundle->getTreeEntry()->Scalars.size()) &&
+                     "Couldn't find extract lane");
+
+              // Since vectorization tree is being built recursively this
+              // assertion ensures that the tree entry has all operands set
+              // before reaching this code. Couple of exceptions known at the
+              // moment are extracts where their second (immediate) operand is
+              // not added. Since immediates do not affect scheduler behavior
+              // this is considered okay.
+              assert(In &&
+                     (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
+                      In->getNumOperands() ==
+                          Bundle->getTreeEntry()->getNumOperands() ||
+                      Bundle->getTreeEntry()->isCopyableElement(In)) &&
+                     "Missed TreeEntry operands?");
+
+              for (unsigned OpIdx :
+                   seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
+                if (auto *I = dyn_cast<Instruction>(
+                        Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
+                  LLVM_DEBUG(dbgs() << "SLP:   check for readiness (def): "
+                                    << *I << "\n");
+                  DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx,
+                                     FirstRun);
+                }
+              // If parent node is schedulable, it will be handle correctly.
+              if (!Bundle->getTreeEntry()->doesNotNeedToSchedule())
+                break;
+              It = std::find(std::next(It),
+                             Bundle->getTreeEntry()->Scalars.end(), In);
+              FirstRun = false;
+            } while (It != Bundle->getTreeEntry()->Scalars.end());
           }
         } else {
           // If BundleMember is a stand-alone instruction, no operand reordering
diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
new file mode 100644
index 0000000000000..7accca311af3c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[BB1:.*]], label %[[BB6:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ <i32 0, i32 0, i32 poison, i32 0>, %[[BB]] ], [ <i32 0, i32 0, i32 -1, i32 -1>, %[[BB1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[TMP1]], 0
+; CHECK-NEXT:    ret void
+;
+bb:
+  br i1 false, label %bb1, label %bb6
+
+bb1:
+  %add = add i32 0, 0
+  %shl = shl i32 %add, 0
+  %sub = sub i32 0, 1
+  %add2 = add i32 %sub, %shl
+  %add3 = add i32 0, 0
+  %shl4 = shl i32 %add3, 0
+  %ashr = ashr i32 %shl4, 1
+  %add5 = add i32 0, 0
+  br label %bb6
+
+bb6:
+  %phi = phi i32 [ poison, %bb ], [ %add2, %bb1 ]
+  %phi7 = phi i32 [ 0, %bb ], [ %ashr, %bb1 ]
+  %phi8 = phi i32 [ 0, %bb ], [ %add2, %bb1 ]
+  %phi9 = phi i32 [ 0, %bb ], [ %add5, %bb1 ]
+  %or = or i32 %phi8, 0
+  ret void
+}
        
    
    
More information about the llvm-commits
mailing list