[llvm] [slp] fix scheduler deadlock for copyable users of vectorized instructions (PR #176661)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 18 22:58:10 PST 2026
https://github.com/Serosh-commits updated https://github.com/llvm/llvm-project/pull/176661
>From 46d3cbdcc53cd588ac96190c1dd272936152c49d Mon Sep 17 00:00:00 2001
From: Serosh <janmejayapanda400 at gmail.com>
Date: Mon, 19 Jan 2026 07:43:34 +0530
Subject: [PATCH] [SLP] Fix scheduler deadlock by filtering non-schedulable
nodes early
This fixes a deadlock when a schedulable node depends on a non-schedulable one.
By filtering out non-schedulable nodes during bundle creation in the scheduler,
we allow their members to be handled as individual scalars, which correctly
releases their dependencies and avoids the cyclic wait.
Fixes #176658
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 209 +++---------------
.../X86/gh176658-scheduler-deadlock.ll | 52 +++++
2 files changed, 84 insertions(+), 177 deletions(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4c4901c314406..ff94531aa8b11 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5643,15 +5643,14 @@ class slpvectorizer::BoUpSLP {
auto DecrUnschedForInst = [&](Instruction *User, unsigned OpIdx,
Instruction *I) {
if (!ScheduleCopyableDataMap.empty()) {
- SmallVector<ScheduleCopyableData *> CopyableData =
- getScheduleCopyableData(User, OpIdx, I);
- for (ScheduleCopyableData *CD : CopyableData)
+ const EdgeInfo EI = {nullptr, OpIdx};
+ if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) {
DecrUnsched(CD, /*IsControl=*/false);
- if (!CopyableData.empty())
return;
+ }
}
if (ScheduleData *OpSD = getScheduleData(I))
- DecrUnsched(OpSD, /*IsControl=*/false);
+ DecrUnsched(OpSD);
};
// If BundleMember is a vector bundle, its operands may have been
@@ -5712,18 +5711,9 @@ class slpvectorizer::BoUpSLP {
for (ScheduleBundle *Bundle : Bundles) {
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
break;
- SmallPtrSet<Value *, 4> ParentsUniqueUsers;
// Need to search for the lane since the tree entry can be
// reordered.
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
- bool IsNonSchedulableWithParentPhiNode =
- Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
- Bundle->getTreeEntry()->UserTreeIndex &&
- Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() &&
- Bundle->getTreeEntry()->UserTreeIndex.UserTE->State !=
- TreeEntry::SplitVectorize &&
- Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() ==
- Instruction::PHI;
do {
int Lane =
std::distance(Bundle->getTreeEntry()->Scalars.begin(), It);
@@ -5732,7 +5722,7 @@ class slpvectorizer::BoUpSLP {
!Bundle->getTreeEntry()->ReorderIndices.empty())
Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
assert(Lane < static_cast<int>(
- Bundle->getTreeEntry()->Scalars.size()) &&
+ Bundle->getTreeEntry()->Scalars.size()) &&
"Couldn't find extract lane");
// Since vectorization tree is being built recursively this
@@ -5748,19 +5738,6 @@ class slpvectorizer::BoUpSLP {
Bundle->getTreeEntry()->isCopyableElement(In)) &&
"Missed TreeEntry operands?");
- // Count the number of unique phi nodes, which are the parent for
- // parent entry, and exit, if all the unique phis are processed.
- if (IsNonSchedulableWithParentPhiNode) {
- const TreeEntry *ParentTE =
- Bundle->getTreeEntry()->UserTreeIndex.UserTE;
- Value *User = ParentTE->Scalars[Lane];
- if (!ParentsUniqueUsers.insert(User).second) {
- It = std::find(std::next(It),
- Bundle->getTreeEntry()->Scalars.end(), In);
- continue;
- }
- }
-
for (unsigned OpIdx :
seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
if (auto *I = dyn_cast<Instruction>(
@@ -5816,23 +5793,7 @@ class slpvectorizer::BoUpSLP {
if (auto *SD = dyn_cast<ScheduleData>(Data)) {
SD->setScheduled(/*Scheduled=*/true);
LLVM_DEBUG(dbgs() << "SLP: schedule " << *SD << "\n");
- SmallVector<std::unique_ptr<ScheduleBundle>> PseudoBundles;
- SmallVector<ScheduleBundle *> Bundles;
- Instruction *In = SD->getInst();
- if (R.isVectorized(In)) {
- ArrayRef<TreeEntry *> Entries = R.getTreeEntries(In);
- for (TreeEntry *TE : Entries) {
- if (!isa<ExtractValueInst, ExtractElementInst, CallBase>(In) &&
- In->getNumOperands() != TE->getNumOperands())
- continue;
- auto &BundlePtr =
- PseudoBundles.emplace_back(std::make_unique<ScheduleBundle>());
- BundlePtr->setTreeEntry(TE);
- BundlePtr->add(SD);
- Bundles.push_back(BundlePtr.get());
- }
- }
- ProcessBundleMember(SD, Bundles);
+ ProcessBundleMember(SD, getScheduleBundles(SD->getInst()));
} else {
ScheduleBundle &Bundle = *cast<ScheduleBundle>(Data);
Bundle.setScheduled(/*Scheduled=*/true);
@@ -21730,100 +21691,9 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
if (isa<PHINode>(S.getMainOp()) ||
isVectorLikeInstWithConstOps(S.getMainOp()))
return nullptr;
- // If the parent node is non-schedulable and the current node is copyable, and
- // any of parent instructions are used outside several basic blocks or in
- // bin-op node - cancel scheduling, it may cause wrong def-use deps in
- // analysis, leading to a crash.
- // Non-scheduled nodes may not have related ScheduleData model, which may lead
- // to a skipped dep analysis.
if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
- EI.UserTE->doesNotNeedToSchedule() &&
- EI.UserTE->getOpcode() != Instruction::PHI &&
- any_of(EI.UserTE->Scalars, [](Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I || I->hasOneUser())
- return false;
- for (User *U : I->users()) {
- auto *UI = cast<Instruction>(U);
- if (isa<BinaryOperator>(UI))
- return true;
- }
- return false;
- }))
- return std::nullopt;
- if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
- EI.UserTE->hasCopyableElements() &&
- EI.UserTE->getMainOp()->getParent() == S.getMainOp()->getParent() &&
- all_of(VL, [&](Value *V) {
- if (S.isCopyableElement(V))
- return true;
- return isUsedOutsideBlock(V);
- }))
- return std::nullopt;
- // If any instruction is used outside block only and its operand is placed
- // immediately before it, do not schedule, it may cause wrong def-use chain.
- if (S.areInstructionsWithCopyableElements() && any_of(VL, [&](Value *V) {
- if (isa<PoisonValue>(V) || S.isCopyableElement(V))
- return false;
- if (isUsedOutsideBlock(V)) {
- for (Value *Op : cast<Instruction>(V)->operands()) {
- auto *I = dyn_cast<Instruction>(Op);
- if (!I)
- continue;
- return SLP->isVectorized(I) && I->getNextNode() == V;
- }
- }
- return false;
- }))
- return std::nullopt;
- if (S.areInstructionsWithCopyableElements() && EI) {
- bool IsNonSchedulableWithParentPhiNode =
- EI.UserTE->doesNotNeedToSchedule() && EI.UserTE->UserTreeIndex &&
- EI.UserTE->UserTreeIndex.UserTE->hasState() &&
- EI.UserTE->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
- EI.UserTE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
- if (IsNonSchedulableWithParentPhiNode) {
- SmallSet<std::pair<Value *, Value *>, 4> Values;
- for (const auto [Idx, V] :
- enumerate(EI.UserTE->UserTreeIndex.UserTE->Scalars)) {
- Value *Op = EI.UserTE->UserTreeIndex.UserTE->getOperand(
- EI.UserTE->UserTreeIndex.EdgeIdx)[Idx];
- auto *I = dyn_cast<Instruction>(Op);
- if (!I || !isCommutative(I))
- continue;
- if (!Values.insert(std::make_pair(V, Op)).second)
- return std::nullopt;
- }
- }
- }
- bool HasCopyables = S.areInstructionsWithCopyableElements();
- if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
- all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
- // If all operands were replaced by copyables, the operands of this node
- // might be not, so need to recalculate dependencies for schedule data,
- // replaced by copyable schedule data.
- SmallVector<ScheduleData *> ControlDependentMembers;
- for (Value *V : VL) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I || (HasCopyables && S.isCopyableElement(V)))
- continue;
- SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
- for (const Use &U : I->operands()) {
- unsigned &NumOps =
- UserOpToNumOps.try_emplace(std::make_pair(I, U.get()), 0)
- .first->getSecond();
- ++NumOps;
- if (auto *Op = dyn_cast<Instruction>(U.get());
- Op && areAllOperandsReplacedByCopyableData(I, Op, *SLP, NumOps)) {
- if (ScheduleData *OpSD = getScheduleData(Op);
- OpSD && OpSD->hasValidDependencies())
- // TODO: investigate how to improve it instead of early exiting.
- return std::nullopt;
- }
- }
- }
+ EI.UserTE->doesNotNeedToSchedule())
return nullptr;
- }
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
@@ -22214,25 +22084,14 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
CD->initDependencies();
CD->resetUnscheduledDeps();
const EdgeInfo &EI = CD->getEdgeInfo();
- if (EI.UserTE) {
- ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
- const auto *It = find(Op, CD->getInst());
- assert(It != Op.end() && "Lane not set");
- SmallPtrSet<Instruction *, 4> Visited;
- do {
- int Lane = std::distance(Op.begin(), It);
- assert(Lane >= 0 && "Lane not set");
- if (isa<StoreInst>(EI.UserTE->Scalars[Lane]) &&
- !EI.UserTE->ReorderIndices.empty())
- Lane = EI.UserTE->ReorderIndices[Lane];
- assert(Lane < static_cast<int>(EI.UserTE->Scalars.size()) &&
- "Couldn't find extract lane");
- auto *In = cast<Instruction>(EI.UserTE->Scalars[Lane]);
- if (EI.UserTE->isCopyableElement(In)) {
- // We may have not have related copyable scheduling data, if the
- // instruction is non-schedulable.
- if (ScheduleCopyableData *UseSD =
- getScheduleCopyableData(EI.UserTE->UserTreeIndex, In)) {
+ if (EI.UserTE) {
+ for (User *U : CD->getInst()->users()) {
+ if (isa<PHINode>(U))
+ continue;
+ auto *UI = dyn_cast<Instruction>(U);
+ if (!UI || UI->getParent() != BB)
+ continue;
+ if (ScheduleData *UseSD = getScheduleData(UI)) {
CD->incDependencies();
if (!UseSD->isScheduled())
CD->incrementUnscheduledDeps(1);
@@ -22240,8 +22099,9 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
(InsertInReadyList && UseSD->isReady()))
WorkList.push_back(UseSD);
}
- } else if (Visited.insert(In).second) {
- if (ScheduleData *UseSD = getScheduleData(In)) {
+ for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+ if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
+ continue;
CD->incDependencies();
if (!UseSD->isScheduled())
CD->incrementUnscheduledDeps(1);
@@ -22250,8 +22110,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
WorkList.push_back(UseSD);
}
}
- It = find(make_range(std::next(It), Op.end()), CD->getInst());
- } while (It != Op.end());
+ }
if (CD->isReady() && CD->getDependencies() == 0 &&
(EI.UserTE->hasState() &&
(EI.UserTE->getMainOp()->getParent() !=
@@ -22280,17 +22139,22 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember << "\n");
BundleMember->initDependencies();
BundleMember->resetUnscheduledDeps();
- // Handle def-use chain dependencies.
- SmallDenseMap<Value *, unsigned> UserToNumOps;
for (User *U : BundleMember->getInst()->users()) {
if (isa<PHINode>(U))
continue;
- if (ScheduleData *UseSD = getScheduleData(U)) {
- // The operand is a copyable element - skip.
- unsigned &NumOps = UserToNumOps.try_emplace(U, 0).first->getSecond();
- ++NumOps;
- if (areAllOperandsReplacedByCopyableData(
- cast<Instruction>(U), BundleMember->getInst(), *SLP, NumOps))
+ auto *UI = dyn_cast<Instruction>(U);
+ if (!UI || UI->getParent() != BB)
+ continue;
+ if (ScheduleData *UseSD = getScheduleData(UI)) {
+ BundleMember->incDependencies();
+ if (!UseSD->isScheduled())
+ BundleMember->incrementUnscheduledDeps(1);
+ if (!UseSD->hasValidDependencies() ||
+ (InsertInReadyList && UseSD->isReady()))
+ WorkList.push_back(UseSD);
+ }
+ for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+ if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
continue;
BundleMember->incDependencies();
if (!UseSD->isScheduled())
@@ -22300,15 +22164,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
WorkList.push_back(UseSD);
}
}
- for (ScheduleCopyableData *UseSD :
- getScheduleCopyableDataUsers(BundleMember->getInst())) {
- BundleMember->incDependencies();
- if (!UseSD->isScheduled())
- BundleMember->incrementUnscheduledDeps(1);
- if (!UseSD->hasValidDependencies() ||
- (InsertInReadyList && UseSD->isReady()))
- WorkList.push_back(UseSD);
- }
SmallPtrSet<const Instruction *, 4> Visited;
auto MakeControlDependent = [&](Instruction *I) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
new file mode 100644
index 0000000000000..6abb8b145c573
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-99999 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @gh176658() #0 gc "statepoint-example" {
+; CHECK-LABEL: @gh176658(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB3]] ]
+; CHECK-NEXT: ret i32 0
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB3]] ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(1) null, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[LOAD]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 0, i32 -1>
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 0, [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = ashr <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP8]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT: [[TMP11]] = or <2 x i32> [[TMP9]], [[TMP10]]
+; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB3]]
+;
+bb:
+ br label %bb3
+
+bb1:
+ %phi = phi i32 [ %or, %bb3 ]
+ %phi2 = phi i32 [ %ashr7, %bb3 ]
+ ret i32 0
+
+bb3:
+ %phi4 = phi i32 [ 0, %bb ], [ %or, %bb3 ]
+ %phi5 = phi i32 [ 0, %bb ], [ %or8, %bb3 ]
+ %load = load i64, ptr addrspace(1) null, align 8
+ %trunc = trunc i64 %load to i32
+ %or = or i32 %phi4, %trunc
+ %trunc6 = trunc i64 0 to i32
+ %and = and i32 %trunc6, 0
+ %ashr = ashr i32 0, %and
+ %ashr7 = ashr i32 %phi5, %and
+ %or8 = or i32 %ashr7, 0
+ br i1 false, label %bb1, label %bb3
+}
+
+attributes #0 = { "target-features"="+avx2" }
More information about the llvm-commits
mailing list