[llvm] [slp] fix scheduler deadlock for copyable users of vectorized instructions (PR #176661)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 18 17:14:19 PST 2026
https://github.com/Serosh-commits updated https://github.com/llvm/llvm-project/pull/176661
>From d099f428252d1207884898af9c7d8986f74e7809 Mon Sep 17 00:00:00 2001
From: Serosh <janmejayapanda400 at gmail.com>
Date: Sun, 18 Jan 2026 23:22:08 +0530
Subject: [PATCH 1/2] [slp] fix scheduler deadlock by correctly tracking
copyable dependencies
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 23 +++++++++++--------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4c4901c314406..06ed3502cfd89 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22576,11 +22576,20 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
if (!Bundle.hasValidDependencies())
BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
}
- continue;
}
- SmallVector<ScheduleCopyableData *> CopyableData =
+ SmallVector<ScheduleCopyableData *> SelfCopyableData =
+ BS->getScheduleCopyableData(I);
+ for (ScheduleCopyableData *SD : reverse(SelfCopyableData)) {
+ ScheduleBundle &Bundle = SD->getBundle();
+ Bundle.setSchedulingPriority(Idx++);
+ if (!Bundle.hasValidDependencies())
+ BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
+ }
+ SmallVector<ScheduleCopyableData *> UserCopyableData =
BS->getScheduleCopyableDataUsers(I);
- if (ScheduleData *SD = BS->getScheduleData(I)) {
+ if (Bundles.empty() && SelfCopyableData.empty()) {
+ ScheduleData *SD = BS->getScheduleData(I);
+ assert(SD && "Expected schedule data");
[[maybe_unused]] ArrayRef<TreeEntry *> SDTEs = getTreeEntries(I);
assert((isVectorLikeInstWithConstOps(SD->getInst()) || SDTEs.empty() ||
SDTEs.front()->doesNotNeedToSchedule() ||
@@ -22588,22 +22597,18 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
"scheduler and vectorizer bundle mismatch");
SD->setSchedulingPriority(Idx++);
if (!SD->hasValidDependencies() &&
- (!CopyableData.empty() ||
+ (!UserCopyableData.empty() ||
any_of(R.ValueToGatherNodes.lookup(I), [&](const TreeEntry *TE) {
assert(TE->isGather() && "expected gather node");
return TE->hasState() && TE->hasCopyableElements() &&
TE->isCopyableElement(I);
}))) {
- // Need to calculate deps for these nodes to correctly handle copyable
- // dependencies, even if they were cancelled.
- // If copyables bundle was cancelled, the deps are cleared and need to
- // recalculate them.
ScheduleBundle Bundle;
Bundle.add(SD);
BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
}
}
- for (ScheduleCopyableData *SD : reverse(CopyableData)) {
+ for (ScheduleCopyableData *SD : reverse(UserCopyableData)) {
ScheduleBundle &Bundle = SD->getBundle();
Bundle.setSchedulingPriority(Idx++);
if (!Bundle.hasValidDependencies())
>From cf153df9bca14c03cccf8d3497168012640b39d1 Mon Sep 17 00:00:00 2001
From: Serosh <janmejayapanda400 at gmail.com>
Date: Sun, 18 Jan 2026 23:23:00 +0530
Subject: [PATCH 2/2] [slp] add regression test for scheduler deadlock
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 165 ++++++++++--------
.../X86/gh176658-scheduler-deadlock.ll | 52 ++++++
2 files changed, 146 insertions(+), 71 deletions(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 06ed3502cfd89..372c0b1b12136 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5642,16 +5642,33 @@ class slpvectorizer::BoUpSLP {
auto DecrUnschedForInst = [&](Instruction *User, unsigned OpIdx,
Instruction *I) {
- if (!ScheduleCopyableDataMap.empty()) {
- SmallVector<ScheduleCopyableData *> CopyableData =
- getScheduleCopyableData(User, OpIdx, I);
- for (ScheduleCopyableData *CD : CopyableData)
- DecrUnsched(CD, /*IsControl=*/false);
- if (!CopyableData.empty())
- return;
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(I);
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *CurrI = Worklist.pop_back_val();
+ if (!Visited.insert(CurrI).second)
+ continue;
+ bool Found = false;
+ if (!ScheduleCopyableDataMap.empty()) {
+ SmallVector<ScheduleCopyableData *> CopyableData =
+ getScheduleCopyableData(CurrI);
+ for (ScheduleCopyableData *CD : CopyableData) {
+ DecrUnsched(CD, /*IsControl=*/false);
+ Found = true;
+ }
+ }
+ if (ScheduleData *OpSD = getScheduleData(CurrI)) {
+ DecrUnsched(OpSD, /*IsControl=*/false);
+ Found = true;
+ }
+ if (!Found && CurrI->getParent() == BB && !isa<PHINode>(CurrI)) {
+ for (Value *Op : CurrI->operands())
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ if (OpI->getParent() == BB && !isa<PHINode>(OpI))
+ Worklist.push_back(OpI);
+ }
}
- if (ScheduleData *OpSD = getScheduleData(I))
- DecrUnsched(OpSD, /*IsControl=*/false);
};
// If BundleMember is a vector bundle, its operands may have been
@@ -22215,43 +22232,45 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
CD->resetUnscheduledDeps();
const EdgeInfo &EI = CD->getEdgeInfo();
if (EI.UserTE) {
- ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
- const auto *It = find(Op, CD->getInst());
- assert(It != Op.end() && "Lane not set");
- SmallPtrSet<Instruction *, 4> Visited;
- do {
- int Lane = std::distance(Op.begin(), It);
- assert(Lane >= 0 && "Lane not set");
- if (isa<StoreInst>(EI.UserTE->Scalars[Lane]) &&
- !EI.UserTE->ReorderIndices.empty())
- Lane = EI.UserTE->ReorderIndices[Lane];
- assert(Lane < static_cast<int>(EI.UserTE->Scalars.size()) &&
- "Couldn't find extract lane");
- auto *In = cast<Instruction>(EI.UserTE->Scalars[Lane]);
- if (EI.UserTE->isCopyableElement(In)) {
- // We may have not have related copyable scheduling data, if the
- // instruction is non-schedulable.
- if (ScheduleCopyableData *UseSD =
- getScheduleCopyableData(EI.UserTE->UserTreeIndex, In)) {
+ SmallVector<Instruction *, 8> Users;
+ Users.push_back(CD->getInst());
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Users.empty()) {
+ Instruction *I = Users.pop_back_val();
+ for (User *U : I->users()) {
+ if (isa<PHINode>(U))
+ continue;
+ auto *UI = dyn_cast<Instruction>(U);
+ if (!UI || UI->getParent() != BB)
+ continue;
+ if (!Visited.insert(UI).second)
+ continue;
+ bool Found = false;
+ if (ScheduleData *UseSD = getScheduleData(UI)) {
CD->incDependencies();
if (!UseSD->isScheduled())
CD->incrementUnscheduledDeps(1);
if (!UseSD->hasValidDependencies() ||
(InsertInReadyList && UseSD->isReady()))
WorkList.push_back(UseSD);
+ Found = true;
}
- } else if (Visited.insert(In).second) {
- if (ScheduleData *UseSD = getScheduleData(In)) {
+ for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+ if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
+ continue;
CD->incDependencies();
if (!UseSD->isScheduled())
CD->incrementUnscheduledDeps(1);
if (!UseSD->hasValidDependencies() ||
(InsertInReadyList && UseSD->isReady()))
WorkList.push_back(UseSD);
+ Found = true;
}
+ if (!Found)
+ Users.push_back(UI);
}
- It = find(make_range(std::next(It), Op.end()), CD->getInst());
- } while (It != Op.end());
+ }
+ }
if (CD->isReady() && CD->getDependencies() == 0 &&
(EI.UserTE->hasState() &&
(EI.UserTE->getMainOp()->getParent() !=
@@ -22280,35 +22299,44 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember << "\n");
BundleMember->initDependencies();
BundleMember->resetUnscheduledDeps();
- // Handle def-use chain dependencies.
- SmallDenseMap<Value *, unsigned> UserToNumOps;
- for (User *U : BundleMember->getInst()->users()) {
- if (isa<PHINode>(U))
- continue;
- if (ScheduleData *UseSD = getScheduleData(U)) {
- // The operand is a copyable element - skip.
- unsigned &NumOps = UserToNumOps.try_emplace(U, 0).first->getSecond();
- ++NumOps;
- if (areAllOperandsReplacedByCopyableData(
- cast<Instruction>(U), BundleMember->getInst(), *SLP, NumOps))
+ SmallVector<Instruction *, 8> Users;
+ Users.push_back(BundleMember->getInst());
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Users.empty()) {
+ Instruction *I = Users.pop_back_val();
+ for (User *U : I->users()) {
+ if (isa<PHINode>(U))
continue;
- BundleMember->incDependencies();
- if (!UseSD->isScheduled())
- BundleMember->incrementUnscheduledDeps(1);
- if (!UseSD->hasValidDependencies() ||
- (InsertInReadyList && UseSD->isReady()))
- WorkList.push_back(UseSD);
+ auto *UI = dyn_cast<Instruction>(U);
+ if (!UI || UI->getParent() != BB)
+ continue;
+ if (!Visited.insert(UI).second)
+ continue;
+ bool Found = false;
+ if (ScheduleData *UseSD = getScheduleData(UI)) {
+ BundleMember->incDependencies();
+ if (!UseSD->isScheduled())
+ BundleMember->incrementUnscheduledDeps(1);
+ if (!UseSD->hasValidDependencies() ||
+ (InsertInReadyList && UseSD->isReady()))
+ WorkList.push_back(UseSD);
+ Found = true;
+ }
+ for (ScheduleCopyableData *UseSD : getScheduleCopyableDataUsers(UI)) {
+ if (UseSD->getBundle().getTreeEntry()->doesNotNeedToSchedule())
+ continue;
+ BundleMember->incDependencies();
+ if (!UseSD->isScheduled())
+ BundleMember->incrementUnscheduledDeps(1);
+ if (!UseSD->hasValidDependencies() ||
+ (InsertInReadyList && UseSD->isReady()))
+ WorkList.push_back(UseSD);
+ Found = true;
+ }
+ if (!Found)
+ Users.push_back(UI);
}
}
- for (ScheduleCopyableData *UseSD :
- getScheduleCopyableDataUsers(BundleMember->getInst())) {
- BundleMember->incDependencies();
- if (!UseSD->isScheduled())
- BundleMember->incrementUnscheduledDeps(1);
- if (!UseSD->hasValidDependencies() ||
- (InsertInReadyList && UseSD->isReady()))
- WorkList.push_back(UseSD);
- }
SmallPtrSet<const Instruction *, 4> Visited;
auto MakeControlDependent = [&](Instruction *I) {
@@ -22576,20 +22604,11 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
if (!Bundle.hasValidDependencies())
BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
}
+ continue;
}
- SmallVector<ScheduleCopyableData *> SelfCopyableData =
- BS->getScheduleCopyableData(I);
- for (ScheduleCopyableData *SD : reverse(SelfCopyableData)) {
- ScheduleBundle &Bundle = SD->getBundle();
- Bundle.setSchedulingPriority(Idx++);
- if (!Bundle.hasValidDependencies())
- BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
- }
- SmallVector<ScheduleCopyableData *> UserCopyableData =
+ SmallVector<ScheduleCopyableData *> CopyableData =
BS->getScheduleCopyableDataUsers(I);
- if (Bundles.empty() && SelfCopyableData.empty()) {
- ScheduleData *SD = BS->getScheduleData(I);
- assert(SD && "Expected schedule data");
+ if (ScheduleData *SD = BS->getScheduleData(I)) {
[[maybe_unused]] ArrayRef<TreeEntry *> SDTEs = getTreeEntries(I);
assert((isVectorLikeInstWithConstOps(SD->getInst()) || SDTEs.empty() ||
SDTEs.front()->doesNotNeedToSchedule() ||
@@ -22597,18 +22616,22 @@ void BoUpSLP::scheduleBlock(const BoUpSLP &R, BlockScheduling *BS) {
"scheduler and vectorizer bundle mismatch");
SD->setSchedulingPriority(Idx++);
if (!SD->hasValidDependencies() &&
- (!UserCopyableData.empty() ||
+ (!CopyableData.empty() ||
any_of(R.ValueToGatherNodes.lookup(I), [&](const TreeEntry *TE) {
assert(TE->isGather() && "expected gather node");
return TE->hasState() && TE->hasCopyableElements() &&
TE->isCopyableElement(I);
}))) {
+ // Need to calculate deps for these nodes to correctly handle copyable
+ // dependencies, even if they were cancelled.
+ // If copyables bundle was cancelled, the deps are cleared and need to
+ // recalculate them.
ScheduleBundle Bundle;
Bundle.add(SD);
BS->calculateDependencies(Bundle, /*InsertInReadyList=*/false, this);
}
}
- for (ScheduleCopyableData *SD : reverse(UserCopyableData)) {
+ for (ScheduleCopyableData *SD : reverse(CopyableData)) {
ScheduleBundle &Bundle = SD->getBundle();
Bundle.setSchedulingPriority(Idx++);
if (!Bundle.hasValidDependencies())
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
new file mode 100644
index 0000000000000..6abb8b145c573
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gh176658-scheduler-deadlock.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-99999 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @gh176658() #0 gc "statepoint-example" {
+; CHECK-LABEL: @gh176658(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB3]] ]
+; CHECK-NEXT: ret i32 0
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB3]] ]
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(1) null, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[LOAD]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], <i32 0, i32 -1>
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 0, [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = ashr <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP8]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT: [[TMP11]] = or <2 x i32> [[TMP9]], [[TMP10]]
+; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB3]]
+;
+bb:
+ br label %bb3
+
+bb1:
+ %phi = phi i32 [ %or, %bb3 ]
+ %phi2 = phi i32 [ %ashr7, %bb3 ]
+ ret i32 0
+
+bb3:
+ %phi4 = phi i32 [ 0, %bb ], [ %or, %bb3 ]
+ %phi5 = phi i32 [ 0, %bb ], [ %or8, %bb3 ]
+ %load = load i64, ptr addrspace(1) null, align 8
+ %trunc = trunc i64 %load to i32
+ %or = or i32 %phi4, %trunc
+ %trunc6 = trunc i64 0 to i32
+ %and = and i32 %trunc6, 0
+ %ashr = ashr i32 0, %and
+ %ashr7 = ashr i32 %phi5, %and
+ %or8 = or i32 %ashr7, 0
+ br i1 false, label %bb1, label %bb3
+}
+
+attributes #0 = { "target-features"="+avx2" }
More information about the llvm-commits
mailing list