[llvm] [slp] fix scheduler deadlock for copyable users of vectorized instructions (PR #176661)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 18 17:14:19 PST 2026


https://github.com/Serosh-commits updated https://github.com/llvm/llvm-project/pull/176661

>From d099f428252d1207884898af9c7d8986f74e7809 Mon Sep 17 00:00:00 2001
From: Serosh <janmejayapanda400 at gmail.com>
Date: Sun, 18 Jan 2026 23:22:08 +0530
Subject: [PATCH 1/2] [slp] fix scheduler deadlock by correctly tracking
 copyable dependencies

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4c4901c314406..06ed3502cfd89 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22576,11 +22576,20 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
         if (!Bundle.hasValidDependencies())
           BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
       }
-      continue;
     }
-    SmallVector<ScheduleCopyableData *> CopyableData =
+    SmallVector<ScheduleCopyableData *> SelfCopyableData =
+        BS->getScheduleCopyableData(I);
+    for (ScheduleCopyableData *SD : reverse(SelfCopyableData)) {
+      ScheduleBundle &Bundle = SD->getBundle();
+      Bundle.setSchedulingPriority(Idx++);
+      if (!Bundle.hasValidDependencies())
+        BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
+    }
+    SmallVector<ScheduleCopyableData *> UserCopyableData =
         BS->getScheduleCopyableDataUsers(I);
-    if (ScheduleData *SD = BS->getScheduleData(I)) {
+    if (Bundles.empty() && SelfCopyableData.empty()) {
+      ScheduleData *SD = BS->getScheduleData(I);
+      assert(SD && "Expected schedule data");
       [[maybe_unused]] ArrayRef<TreeEntry *> SDTEs = getTreeEntries(I);
       assert((isVectorLikeInstWithConstOps(SD->getInst()) || SDTEs.empty() ||
               SDTEs.front()->doesNotNeedToSchedule() ||
@@ -22588,22 +22597,18 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
              "scheduler and vectorizer bundle mismatch");
       SD->setSchedulingPriority(Idx++);
       if (!SD->hasValidDependencies() &&
-          (!CopyableData.empty() ||
+          (!UserCopyableData.empty() ||
            any_of(R.ValueToGatherNodes.lookup(I), [&](const TreeEntry *TE) {
              assert(TE->isGather() && "expected gather node");
              return TE->hasState() && TE->hasCopyableElements() &&
                     TE->isCopyableElement(I);
            }))) {
-        // Need to calculate deps for these nodes to correctly handle copyable
-        // dependencies, even if they were cancelled.
-        // If copyables bundle was cancelled, the deps are cleared and need to
-        // recalculate them.
         ScheduleBundle Bundle;
         Bundle.add(SD);
         BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
       }
     }
-    for (ScheduleCopyableData *SD : reverse(CopyableData)) {
+    for (ScheduleCopyableData *SD : reverse(UserCopyableData)) {
       ScheduleBundle &Bundle = SD->getBundle();
       Bundle.setSchedulingPriority(Idx++);
       if (!Bundle.hasValidDependencies())

>From cf153df9bca14c03cccf8d3497168012640b39d1 Mon Sep 17 00:00:00 2001
From: Serosh <janmejayapanda400 at gmail.com>
Date: Sun, 18 Jan 2026 23:23:00 +0530
Subject: [PATCH 2/2] [slp] add regression test for scheduler deadlock

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 165 ++++++++++--------
 .../X86/gh176658-scheduler-deadlock.ll        |  52 ++++++
 2 files changed, 146 insertions(+), 71 deletions(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 06ed3502cfd89..372c0b1b12136 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5642,16 +5642,33 @@ class slpvectorizer::BoUpSLP {
 
         auto DecrUnschedForInst = [&](Instruction *User, unsigned OpIdx,
                                       Instruction *I) {
-          if (!ScheduleCopyableDataMap.empty()) {
-            SmallVector<ScheduleCopyableData *> CopyableData =
-                getScheduleCopyableData(User, OpIdx, I);
-            for (ScheduleCopyableData *CD : CopyableData)
-              DecrUnsched(CD, /*IsControl=*/false);
-            if (!CopyableData.empty())
-              return;
+          SmallVector<Instruction *, 8> Worklist;
+          Worklist.push_back(I);
+          SmallPtrSet<Instruction *, 8> Visited;
+          while (!Worklist.empty()) {
+            Instruction *CurrI = Worklist.pop_back_val();
+            if (!Visited.insert(CurrI).second)
+              continue;
+            bool Found = false;
+            if (!ScheduleCopyableDataMap.empty()) {
+              SmallVector<ScheduleCopyableData *> CopyableData =
+                  getScheduleCopyableData(CurrI);
+              for (ScheduleCopyableData *CD : CopyableData) {
+                DecrUnsched(CD, /*IsControl=*/false);
+                Found = true;
+              }
+            }
+            if (ScheduleData *OpSD = getScheduleData(CurrI)) {
+              DecrUnsched(OpSD, /*IsControl=*/false);
+              Found = true;
+            }
+            if (!Found && CurrI->getParent() == BB && !isa<PHINode>(CurrI)) {
+              for (Value *Op : CurrI->operands())
+                if (auto *OpI = dyn_cast<Instruction>(Op))
+                  if (OpI->getParent() == BB && !isa<PHINode>(OpI))
+                    Worklist.push_back(OpI);
+            }
           }
-          if (ScheduleData *OpSD = getScheduleData(I))
-            DecrUnsched(OpSD, /*IsControl=*/false);
         };
 
         // If BundleMember is a vector bundle, its operands may have been
@@ -22215,43 +22232,45 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
       CD->resetUnscheduledDeps();
       const EdgeInfo &EI = CD->getEdgeInfo();
       if (EI.UserTE) {
-        ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
-        const auto *It = find(Op, CD->getInst());
-        assert(It != Op.end() && "Lane not set");
-        SmallPtrSet<Instruction *, 4> Visited;
-        do {
-          int Lane = std::distance(Op.begin(), It);
-          assert(Lane >= 0 && "Lane not set");
-          if (isa<StoreInst>(EI.UserTE->Scalars[Lane]) &&
-              !EI.UserTE->ReorderIndices.empty())
-            Lane = EI.UserTE->ReorderIndices[Lane];
-          assert(Lane < static_cast<int>(EI.UserTE->Scalars.size()) &&
-                 "Couldn't find extract lane");
-          auto *In = cast<Instruction>(EI.UserTE->Scalars[Lane]);
-          if (EI.UserTE->isCopyableElement(In)) {
-            // We may have not have related copyable scheduling data, if the
-            // instruction is non-schedulable.
-            if (ScheduleCopyableData *UseSD =
-                    getScheduleCopyableData(EI.UserTE->UserTreeIndex, In)) {
+        SmallVector<Instruction *, 8> Users;
+        Users.push_back(CD->getInst());
+        SmallPtrSet<Instruction *, 8> Visited;
+        while (!Users.empty()) {
+          Instruction *I = Users.pop_back_val();
+          for (User *U : I->users()) {
+            if (isa<PHINode>(U))
+              continue;
+            auto *UI = dyn_cast<Instruction>(U);
+            if (!UI || UI->getParent() != BB)
+              continue;
+            if (!Visited.insert(UI).second)
+              continue;
+            bool Found = false;
+            if (ScheduleData *UseSD = getScheduleData(UI)) {
               CD->incDependencies();
               if (!UseSD->isScheduled())
                 CD->incrementUnscheduledDeps(1);
               if (!UseSD->hasValidDependencies() ||
                   (InsertInReadyList && UseSD->isReady()))
                 WorkList.push_back(UseSD);
+              Found = true;
             }
-          } else if (Visited.insert(In).second) {
-            if (ScheduleData *UseSD = getScheduleData(In)) {
+            for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+              if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
+                continue;
               CD->incDependencies();
               if (!UseSD->isScheduled())
                 CD->incrementUnscheduledDeps(1);
               if (!UseSD->hasValidDependencies() ||
                   (InsertInReadyList && UseSD->isReady()))
                 WorkList.push_back(UseSD);
+              Found = true;
             }
+            if (!Found)
+              Users.push_back(UI);
           }
-          It = find(make_range(std::next(It), Op.end()), CD->getInst());
-        } while (It != Op.end());
+        }
+      }
         if (CD->isReady() && CD->getDependencies() == 0 &&
             (EI.UserTE->hasState() &&
              (EI.UserTE->getMainOp()->getParent() !=
@@ -22280,35 +22299,44 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
     LLVM_DEBUG(dbgs() << "SLP:       update deps of " << *BundleMember << "\n");
     BundleMember->initDependencies();
     BundleMember->resetUnscheduledDeps();
-    // Handle def-use chain dependencies.
-    SmallDenseMap<Value *, unsigned> UserToNumOps;
-    for (User *U : BundleMember->getInst()->users()) {
-      if (isa<PHINode>(U))
-        continue;
-      if (ScheduleData *UseSD = getScheduleData(U)) {
-        // The operand is a copyable element - skip.
-        unsigned &NumOps = UserToNumOps.try_emplace(U, 0).first->getSecond();
-        ++NumOps;
-        if (areAllOperandsReplacedByCopyableData(
-                cast<Instruction>(U), BundleMember->getInst(), *SLP, NumOps))
+    SmallVector<Instruction *, 8> Users;
+    Users.push_back(BundleMember->getInst());
+    SmallPtrSet<Instruction *, 8> Visited;
+    while (!Users.empty()) {
+      Instruction *I = Users.pop_back_val();
+      for (User *U : I->users()) {
+        if (isa<PHINode>(U))
           continue;
-        BundleMember->incDependencies();
-        if (!UseSD->isScheduled())
-          BundleMember->incrementUnscheduledDeps(1);
-        if (!UseSD->hasValidDependencies() ||
-            (InsertInReadyList && UseSD->isReady()))
-          WorkList.push_back(UseSD);
+        auto *UI = dyn_cast<Instruction>(U);
+        if (!UI || UI->getParent() != BB)
+          continue;
+        if (!Visited.insert(UI).second)
+          continue;
+        bool Found = false;
+        if (ScheduleData *UseSD = getScheduleData(UI)) {
+          BundleMember->incDependencies();
+          if (!UseSD->isScheduled())
+            BundleMember->incrementUnscheduledDeps(1);
+          if (!UseSD->hasValidDependencies() ||
+              (InsertInReadyList && UseSD->isReady()))
+            WorkList.push_back(UseSD);
+          Found = true;
+        }
+        for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+          if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
+            continue;
+          BundleMember->incDependencies();
+          if (!UseSD->isScheduled())
+            BundleMember->incrementUnscheduledDeps(1);
+          if (!UseSD->hasValidDependencies() ||
+              (InsertInReadyList && UseSD->isReady()))
+            WorkList.push_back(UseSD);
+          Found = true;
+        }
+        if (!Found)
+          Users.push_back(UI);
       }
     }
-    for (ScheduleCopyableData *UseSD :
-         getScheduleCopyableDataUsers(BundleMember->getInst())) {
-      BundleMember->incDependencies();
-      if (!UseSD->isScheduled())
-        BundleMember->incrementUnscheduledDeps(1);
-      if (!UseSD->hasValidDependencies() ||
-          (InsertInReadyList && UseSD->isReady()))
-        WorkList.push_back(UseSD);
-    }
 
     SmallPtrSet<const Instruction *, 4> Visited;
     auto MakeControlDependent = [&](Instruction *I) {
@@ -22576,20 +22604,11 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
         if (!Bundle.hasValidDependencies())
           BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
       }
+      continue;
     }
-    SmallVector<ScheduleCopyableData *> SelfCopyableData =
-        BS->getScheduleCopyableData(I);
-    for (ScheduleCopyableData *SD : reverse(SelfCopyableData)) {
-      ScheduleBundle &Bundle = SD->getBundle();
-      Bundle.setSchedulingPriority(Idx++);
-      if (!Bundle.hasValidDependencies())
-        BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
-    }
-    SmallVector<ScheduleCopyableData *> UserCopyableData =
+    SmallVector<ScheduleCopyableData *> CopyableData =
         BS->getScheduleCopyableDataUsers(I);
-    if (Bundles.empty() && SelfCopyableData.empty()) {
-      ScheduleData *SD = BS->getScheduleData(I);
-      assert(SD && "Expected schedule data");
+    if (ScheduleData *SD = BS->getScheduleData(I)) {
       [[maybe_unused]] ArrayRef<TreeEntry *> SDTEs = getTreeEntries(I);
       assert((isVectorLikeInstWithConstOps(SD->getInst()) || SDTEs.empty() ||
               SDTEs.front()->doesNotNeedToSchedule() ||
@@ -22597,18 +22616,22 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
              "scheduler and vectorizer bundle mismatch");
       SD->setSchedulingPriority(Idx++);
       if (!SD->hasValidDependencies() &&
-          (!UserCopyableData.empty() ||
+          (!CopyableData.empty() ||
            any_of(R.ValueToGatherNodes.lookup(I), [&](const TreeEntry *TE) {
              assert(TE->isGather() && "expected gather node");
              return TE->hasState() && TE->hasCopyableElements() &&
                     TE->isCopyableElement(I);
            }))) {
+        // Need to calculate deps for these nodes to correctly handle copyable
+        // dependencies, even if they were cancelled.
+        // If copyables bundle was cancelled, the deps are cleared and need to
+        // recalculate them.
         ScheduleBundle Bundle;
         Bundle.add(SD);
         BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
       }
     }
-    for (ScheduleCopyableData *SD : reverse(UserCopyableData)) {
+    for (ScheduleCopyableData *SD : reverse(CopyableData)) {
       ScheduleBundle &Bundle = SD->getBundle();
       Bundle.setSchedulingPriority(Idx++);
       if (!Bundle.hasValidDependencies())
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
new file mode 100644
index 0000000000000..6abb8b145c573
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-99999 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @gh176658() #0 gc "statepoint-example" {
+; CHECK-LABEL: @gh176658(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB3]] ]
+; CHECK-NEXT:    ret i32 0
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr addrspace(1) null, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[LOAD]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 0, i32 -1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 0, [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP8]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP11]] = or <2 x i32> [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    br i1 false, label [[BB1:%.*]], label [[BB3]]
+;
+bb:
+  br label %bb3
+
+bb1:
+  %phi = phi i32 [ %or, %bb3 ]
+  %phi2 = phi i32 [ %ashr7, %bb3 ]
+  ret i32 0
+
+bb3:
+  %phi4 = phi i32 [ 0, %bb ], [ %or, %bb3 ]
+  %phi5 = phi i32 [ 0, %bb ], [ %or8, %bb3 ]
+  %load = load i64, ptr addrspace(1) null, align 8
+  %trunc = trunc i64 %load to i32
+  %or = or i32 %phi4, %trunc
+  %trunc6 = trunc i64 0 to i32
+  %and = and i32 %trunc6, 0
+  %ashr = ashr i32 0, %and
+  %ashr7 = ashr i32 %phi5, %and
+  %or8 = or i32 %ashr7, 0
+  br i1 false, label %bb1, label %bb3
+}
+
+attributes #0 = { "target-features"="+avx2" }



More information about the llvm-commits mailing list