[llvm] [slp] fix scheduler deadlock for copyable users of vectorized instructions (PR #176661)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 18 22:58:10 PST 2026


https://github.com/Serosh-commits updated https://github.com/llvm/llvm-project/pull/176661

>From 46d3cbdcc53cd588ac96190c1dd272936152c49d Mon Sep 17 00:00:00 2001
From: Serosh <janmejayapanda400 at gmail.com>
Date: Mon, 19 Jan 2026 07:43:34 +0530
Subject: [PATCH] [SLP] Fix scheduler deadlock by filtering non-schedulable
 nodes early

This fixes a deadlock when a schedulable node depends on a non-schedulable one.
By filtering out non-schedulable nodes during bundle creation in the scheduler,
we allow their members to be handled as individual scalars, which correctly
releases their dependencies and avoids the cyclic wait.

Fixes #176658
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 209 +++---------------
 .../X86/gh176658-scheduler-deadlock.ll        |  52 +++++
 2 files changed, 84 insertions(+), 177 deletions(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4c4901c314406..ff94531aa8b11 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5643,15 +5643,14 @@ class slpvectorizer::BoUpSLP {
         auto DecrUnschedForInst = [&](Instruction *User, unsigned OpIdx,
                                       Instruction *I) {
           if (!ScheduleCopyableDataMap.empty()) {
-            SmallVector<ScheduleCopyableData *> CopyableData =
-                getScheduleCopyableData(User, OpIdx, I);
-            for (ScheduleCopyableData *CD : CopyableData)
+            const EdgeInfo EI = {nullptr, OpIdx};
+            if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) {
               DecrUnsched(CD, /*IsControl=*/false);
-            if (!CopyableData.empty())
               return;
+            }
           }
           if (ScheduleData *OpSD = getScheduleData(I))
-            DecrUnsched(OpSD, /*IsControl=*/false);
+            DecrUnsched(OpSD);
         };
 
         // If BundleMember is a vector bundle, its operands may have been
@@ -5712,18 +5711,9 @@ class slpvectorizer::BoUpSLP {
           for (ScheduleBundle *Bundle : Bundles) {
             if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
               break;
-            SmallPtrSet<Value *, 4> ParentsUniqueUsers;
             // Need to search for the lane since the tree entry can be
             // reordered.
             auto *It = find(Bundle->getTreeEntry()->Scalars, In);
-            bool IsNonSchedulableWithParentPhiNode =
-                Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
-                Bundle->getTreeEntry()->UserTreeIndex &&
-                Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() &&
-                Bundle->getTreeEntry()->UserTreeIndex.UserTE->State !=
-                    TreeEntry::SplitVectorize &&
-                Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() ==
-                    Instruction::PHI;
             do {
               int Lane =
                   std::distance(Bundle->getTreeEntry()->Scalars.begin(), It);
@@ -5732,7 +5722,7 @@ class slpvectorizer::BoUpSLP {
                   !Bundle->getTreeEntry()->ReorderIndices.empty())
                 Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
               assert(Lane < static_cast<int>(
-                                Bundle->getTreeEntry()->Scalars.size()) &&
+                                 Bundle->getTreeEntry()->Scalars.size()) &&
                      "Couldn't find extract lane");
 
               // Since vectorization tree is being built recursively this
@@ -5748,19 +5738,6 @@ class slpvectorizer::BoUpSLP {
                       Bundle->getTreeEntry()->isCopyableElement(In)) &&
                      "Missed TreeEntry operands?");
 
-              // Count the number of unique phi nodes, which are the parent for
-              // parent entry, and exit, if all the unique phis are processed.
-              if (IsNonSchedulableWithParentPhiNode) {
-                const TreeEntry *ParentTE =
-                    Bundle->getTreeEntry()->UserTreeIndex.UserTE;
-                Value *User = ParentTE->Scalars[Lane];
-                if (!ParentsUniqueUsers.insert(User).second) {
-                  It = std::find(std::next(It),
-                                 Bundle->getTreeEntry()->Scalars.end(), In);
-                  continue;
-                }
-              }
-
               for (unsigned OpIdx :
                    seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
                 if (auto *I = dyn_cast<Instruction>(
@@ -5816,23 +5793,7 @@ class slpvectorizer::BoUpSLP {
       if (auto *SD = dyn_cast<ScheduleData>(Data)) {
         SD->setScheduled(/*Scheduled=*/true);
         LLVM_DEBUG(dbgs() << "SLP:   schedule " << *SD << "\n");
-        SmallVector<std::unique_ptr<ScheduleBundle>> PseudoBundles;
-        SmallVector<ScheduleBundle *> Bundles;
-        Instruction *In = SD->getInst();
-        if (R.isVectorized(In)) {
-          ArrayRef<TreeEntry *> Entries = R.getTreeEntries(In);
-          for (TreeEntry *TE : Entries) {
-            if (!isa<ExtractValueInst, ExtractElementInst, CallBase>(In) &&
-                In->getNumOperands() != TE->getNumOperands())
-              continue;
-            auto &BundlePtr =
-                PseudoBundles.emplace_back(std::make_unique<ScheduleBundle>());
-            BundlePtr->setTreeEntry(TE);
-            BundlePtr->add(SD);
-            Bundles.push_back(BundlePtr.get());
-          }
-        }
-        ProcessBundleMember(SD, Bundles);
+        ProcessBundleMember(SD, getScheduleBundles(SD->getInst()));
       } else {
         ScheduleBundle &Bundle = *cast<ScheduleBundle>(Data);
         Bundle.setScheduled(/*Scheduled=*/true);
@@ -21730,100 +21691,9 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
   if (isa<PHINode>(S.getMainOp()) ||
       isVectorLikeInstWithConstOps(S.getMainOp()))
     return nullptr;
-  // If the parent node is non-schedulable and the current node is copyable, and
-  // any of parent instructions are used outside several basic blocks or in
-  // bin-op node - cancel scheduling, it may cause wrong def-use deps in
-  // analysis, leading to a crash.
-  // Non-scheduled nodes may not have related ScheduleData model, which may lead
-  // to a skipped dep analysis.
   if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
-      EI.UserTE->doesNotNeedToSchedule() &&
-      EI.UserTE->getOpcode() != Instruction::PHI &&
-      any_of(EI.UserTE->Scalars, [](Value *V) {
-        auto *I = dyn_cast<Instruction>(V);
-        if (!I || I->hasOneUser())
-          return false;
-        for (User *U : I->users()) {
-          auto *UI = cast<Instruction>(U);
-          if (isa<BinaryOperator>(UI))
-            return true;
-        }
-        return false;
-      }))
-    return std::nullopt;
-  if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
-      EI.UserTE->hasCopyableElements() &&
-      EI.UserTE->getMainOp()->getParent() == S.getMainOp()->getParent() &&
-      all_of(VL, [&](Value *V) {
-        if (S.isCopyableElement(V))
-          return true;
-        return isUsedOutsideBlock(V);
-      }))
-    return std::nullopt;
-  // If any instruction is used outside block only and its operand is placed
-  // immediately before it, do not schedule, it may cause wrong def-use chain.
-  if (S.areInstructionsWithCopyableElements() && any_of(VL, [&](Value *V) {
-        if (isa<PoisonValue>(V) || S.isCopyableElement(V))
-          return false;
-        if (isUsedOutsideBlock(V)) {
-          for (Value *Op : cast<Instruction>(V)->operands()) {
-            auto *I = dyn_cast<Instruction>(Op);
-            if (!I)
-              continue;
-            return SLP->isVectorized(I) && I->getNextNode() == V;
-          }
-        }
-        return false;
-      }))
-    return std::nullopt;
-  if (S.areInstructionsWithCopyableElements() && EI) {
-    bool IsNonSchedulableWithParentPhiNode =
-        EI.UserTE->doesNotNeedToSchedule() && EI.UserTE->UserTreeIndex &&
-        EI.UserTE->UserTreeIndex.UserTE->hasState() &&
-        EI.UserTE->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
-        EI.UserTE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
-    if (IsNonSchedulableWithParentPhiNode) {
-      SmallSet<std::pair<Value *, Value *>, 4> Values;
-      for (const auto [Idx, V] :
-           enumerate(EI.UserTE->UserTreeIndex.UserTE->Scalars)) {
-        Value *Op = EI.UserTE->UserTreeIndex.UserTE->getOperand(
-            EI.UserTE->UserTreeIndex.EdgeIdx)[Idx];
-        auto *I = dyn_cast<Instruction>(Op);
-        if (!I || !isCommutative(I))
-          continue;
-        if (!Values.insert(std::make_pair(V, Op)).second)
-          return std::nullopt;
-      }
-    }
-  }
-  bool HasCopyables = S.areInstructionsWithCopyableElements();
-  if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
-       all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
-    // If all operands were replaced by copyables, the operands of this node
-    // might be not, so need to recalculate dependencies for schedule data,
-    // replaced by copyable schedule data.
-    SmallVector<ScheduleData *> ControlDependentMembers;
-    for (Value *V : VL) {
-      auto *I = dyn_cast<Instruction>(V);
-      if (!I || (HasCopyables && S.isCopyableElement(V)))
-        continue;
-      SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
-      for (const Use &U : I->operands()) {
-        unsigned &NumOps =
-            UserOpToNumOps.try_emplace(std::make_pair(I, U.get()), 0)
-                .first->getSecond();
-        ++NumOps;
-        if (auto *Op = dyn_cast<Instruction>(U.get());
-            Op && areAllOperandsReplacedByCopyableData(I, Op, *SLP, NumOps)) {
-          if (ScheduleData *OpSD = getScheduleData(Op);
-              OpSD && OpSD->hasValidDependencies())
-            // TODO: investigate how to improve it instead of early exiting.
-            return std::nullopt;
-        }
-      }
-    }
+      EI.UserTE->doesNotNeedToSchedule())
     return nullptr;
-  }
 
   // Initialize the instruction bundle.
   Instruction *OldScheduleEnd = ScheduleEnd;
@@ -22214,25 +22084,14 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
       CD->initDependencies();
       CD->resetUnscheduledDeps();
       const EdgeInfo &EI = CD->getEdgeInfo();
-      if (EI.UserTE) {
-        ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
-        const auto *It = find(Op, CD->getInst());
-        assert(It != Op.end() && "Lane not set");
-        SmallPtrSet<Instruction *, 4> Visited;
-        do {
-          int Lane = std::distance(Op.begin(), It);
-          assert(Lane >= 0 && "Lane not set");
-          if (isa<StoreInst>(EI.UserTE->Scalars[Lane]) &&
-              !EI.UserTE->ReorderIndices.empty())
-            Lane = EI.UserTE->ReorderIndices[Lane];
-          assert(Lane < static_cast<int>(EI.UserTE->Scalars.size()) &&
-                 "Couldn't find extract lane");
-          auto *In = cast<Instruction>(EI.UserTE->Scalars[Lane]);
-          if (EI.UserTE->isCopyableElement(In)) {
-            // We may have not have related copyable scheduling data, if the
-            // instruction is non-schedulable.
-            if (ScheduleCopyableData *UseSD =
-                    getScheduleCopyableData(EI.UserTE->UserTreeIndex, In)) {
+        if (EI.UserTE) {
+          for (User *U : CD->getInst()->users()) {
+            if (isa<PHINode>(U))
+              continue;
+            auto *UI = dyn_cast<Instruction>(U);
+            if (!UI || UI->getParent() != BB)
+              continue;
+            if (ScheduleData *UseSD = getScheduleData(UI)) {
               CD->incDependencies();
               if (!UseSD->isScheduled())
                 CD->incrementUnscheduledDeps(1);
@@ -22240,8 +22099,9 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
                   (InsertInReadyList && UseSD->isReady()))
                 WorkList.push_back(UseSD);
             }
-          } else if (Visited.insert(In).second) {
-            if (ScheduleData *UseSD = getScheduleData(In)) {
+            for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+              if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
+                continue;
               CD->incDependencies();
               if (!UseSD->isScheduled())
                 CD->incrementUnscheduledDeps(1);
@@ -22250,8 +22110,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
                 WorkList.push_back(UseSD);
             }
           }
-          It = find(make_range(std::next(It), Op.end()), CD->getInst());
-        } while (It != Op.end());
+        }
         if (CD->isReady() && CD->getDependencies() == 0 &&
             (EI.UserTE->hasState() &&
              (EI.UserTE->getMainOp()->getParent() !=
@@ -22280,17 +22139,22 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
     LLVM_DEBUG(dbgs() << "SLP:       update deps of " << *BundleMember << "\n");
     BundleMember->initDependencies();
     BundleMember->resetUnscheduledDeps();
-    // Handle def-use chain dependencies.
-    SmallDenseMap<Value *, unsigned> UserToNumOps;
     for (User *U : BundleMember->getInst()->users()) {
       if (isa<PHINode>(U))
         continue;
-      if (ScheduleData *UseSD = getScheduleData(U)) {
-        // The operand is a copyable element - skip.
-        unsigned &NumOps = UserToNumOps.try_emplace(U, 0).first->getSecond();
-        ++NumOps;
-        if (areAllOperandsReplacedByCopyableData(
-                cast<Instruction>(U), BundleMember->getInst(), *SLP, NumOps))
+      auto *UI = dyn_cast<Instruction>(U);
+      if (!UI || UI->getParent() != BB)
+        continue;
+      if (ScheduleData *UseSD = getScheduleData(UI)) {
+        BundleMember->incDependencies();
+        if (!UseSD->isScheduled())
+          BundleMember->incrementUnscheduledDeps(1);
+        if (!UseSD->hasValidDependencies() ||
+            (InsertInReadyList && UseSD->isReady()))
+          WorkList.push_back(UseSD);
+      }
+      for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+        if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
           continue;
         BundleMember->incDependencies();
         if (!UseSD->isScheduled())
@@ -22300,15 +22164,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
           WorkList.push_back(UseSD);
       }
     }
-    for (ScheduleCopyableData *UseSD :
-         getScheduleCopyableDataUsers(BundleMember->getInst())) {
-      BundleMember->incDependencies();
-      if (!UseSD->isScheduled())
-        BundleMember->incrementUnscheduledDeps(1);
-      if (!UseSD->hasValidDependencies() ||
-          (InsertInReadyList && UseSD->isReady()))
-        WorkList.push_back(UseSD);
-    }
 
     SmallPtrSet<const Instruction *, 4> Visited;
     auto MakeControlDependent = [&](Instruction *I) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
new file mode 100644
index 0000000000000..6abb8b145c573
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-99999 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @gh176658() #0 gc "statepoint-example" {
+; CHECK-LABEL: @gh176658(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB3]] ]
+; CHECK-NEXT:    ret i32 0
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr addrspace(1) null, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[LOAD]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 0, i32 -1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 0, [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP8]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP11]] = or <2 x i32> [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    br i1 false, label [[BB1:%.*]], label [[BB3]]
+;
+bb:
+  br label %bb3
+
+bb1:
+  %phi = phi i32 [ %or, %bb3 ]
+  %phi2 = phi i32 [ %ashr7, %bb3 ]
+  ret i32 0
+
+bb3:
+  %phi4 = phi i32 [ 0, %bb ], [ %or, %bb3 ]
+  %phi5 = phi i32 [ 0, %bb ], [ %or8, %bb3 ]
+  %load = load i64, ptr addrspace(1) null, align 8
+  %trunc = trunc i64 %load to i32
+  %or = or i32 %phi4, %trunc
+  %trunc6 = trunc i64 0 to i32
+  %and = and i32 %trunc6, 0
+  %ashr = ashr i32 0, %and
+  %ashr7 = ashr i32 %phi5, %and
+  %or8 = or i32 %ashr7, 0
+  br i1 false, label %bb1, label %bb3
+}
+
+attributes #0 = { "target-features"="+avx2" }



More information about the llvm-commits mailing list