[llvm] 913837e - [ScheduleDAG] Fix removing edges with weak deps
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 25 10:06:19 PST 2023
Author: Austin Kerbow
Date: 2023-01-25T10:05:50-08:00
New Revision: 913837eaa3d14a5fb68179f8210dc097383a048c
URL: https://github.com/llvm/llvm-project/commit/913837eaa3d14a5fb68179f8210dc097383a048c
DIFF: https://github.com/llvm/llvm-project/commit/913837eaa3d14a5fb68179f8210dc097383a048c.diff
LOG: [ScheduleDAG] Fix removing edges with weak deps
In SUnit::removePred edges are removed from the Preds and Succs lists before
updating the bookkeeping. This could result in incorrect values for
NumPreds/SuccsLeft and cause WeakPreds/SuccsLeft to underflow, since the
incorrect SDep will be used to update these values.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D142325
Added:
llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
Modified:
llvm/lib/CodeGen/ScheduleDAG.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index 696b29018ae69..ef886ad14649a 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -183,8 +183,6 @@ void SUnit::removePred(const SDep &D) {
SUnit *N = D.getSUnit();
SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P);
assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
- N->Succs.erase(Succ);
- Preds.erase(I);
// Update the bookkeeping.
if (P.getKind() == SDep::Data) {
assert(NumPreds > 0 && "NumPreds will underflow!");
@@ -193,21 +191,25 @@ void SUnit::removePred(const SDep &D) {
--N->NumSuccs;
}
if (!N->isScheduled) {
- if (D.isWeak())
+ if (D.isWeak()) {
+ assert(WeakPredsLeft > 0 && "WeakPredsLeft will underflow!");
--WeakPredsLeft;
- else {
+ } else {
assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
--NumPredsLeft;
}
}
if (!isScheduled) {
- if (D.isWeak())
+ if (D.isWeak()) {
+ assert(WeakSuccsLeft > 0 && "WeakSuccsLeft will underflow!");
--N->WeakSuccsLeft;
- else {
+ } else {
assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
--N->NumSuccsLeft;
}
}
+ N->Succs.erase(Succ);
+ Preds.erase(I);
if (P.getLatency() != 0) {
this->setDepthDirty();
N->setHeightDirty();
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
new file mode 100644
index 0000000000000..9ef4cf5708113
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
@@ -0,0 +1,42 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
+
+# This would hang after removing edges from the SCHED_BARRIER since the number
+# of Preds/Succs would be left in an inconsistent state.
+
+---
+name: sched_barrier_cluster_dep_hang
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: sched_barrier_cluster_dep_hang
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: SCHED_BARRIER 128
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32))
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32))
+ ; CHECK-NEXT: SCHED_BARRIER 128
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 512, 0, implicit $exec :: (store (s32))
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32))
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+
+ bb.1:
+ SCHED_BARRIER 128
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32))
+ %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32))
+ SCHED_BARRIER 128
+ %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec
+ %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32))
+ GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32))
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list