[llvm] 557ea98 - [MISched] Dump the execution trace of the schedule.
Francesco Petrogalli via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 26 08:55:08 PST 2023
Author: Francesco Petrogalli
Date: 2023-01-26T17:54:55+01:00
New Revision: 557ea9867f289469b40debb0f2c0a3baf9709c6d
URL: https://github.com/llvm/llvm-project/commit/557ea9867f289469b40debb0f2c0a3baf9709c6d
DIFF: https://github.com/llvm/llvm-project/commit/557ea9867f289469b40debb0f2c0a3baf9709c6d.diff
LOG: [MISched] Dump the execution trace of the schedule.
The traces are printed only for bottom-up and top-down scheduling
because the values of TopReadyCycle and BottomReadyCycle are
inconsistent when obtained via bidirectional scheduling (see
`BIDIRECTIONAL` checks in the test).
Differential Revision: https://reviews.llvm.org/D142529
Added:
llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
Modified:
llvm/include/llvm/CodeGen/MachineScheduler.h
llvm/lib/CodeGen/MachineScheduler.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 997c3a4f74a1..05f56543e69e 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -374,6 +374,9 @@ class ScheduleDAGMI : public ScheduleDAGInstrs {
/// dump the scheduled Sequence.
void dumpSchedule() const;
+ /// Print execution trace of the schedule top-down or bottom-up.
+ void dumpScheduleTraceTopDown() const;
+ void dumpScheduleTraceBottomUp() const;
// Lesser helpers...
bool checkSchedLimit();
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 5ab5a40e7574..48c5ab0c7f32 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -147,6 +147,21 @@ static cl::opt<unsigned>
cl::desc("The threshold for fast cluster"),
cl::init(1000));
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static cl::opt<bool> MISchedDumpScheduleTrace(
+ "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
+ cl::desc("Dump resource usage at schedule boundary."));
+static cl::opt<unsigned>
+ HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
+ cl::desc("Set width of the columns with "
+ "the resources and schedule units"),
+ cl::init(19));
+static cl::opt<unsigned>
+ ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
+ cl::desc("Set width of the columns showing resource booking."),
+ cl::init(5));
+#endif
+
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -930,8 +945,153 @@ void ScheduleDAGMI::placeDebugValues() {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static const char *scheduleTableLegend = " i: issue\n x: resource booked";
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (TopDown):\n";
+ dbgs() << scheduleTableLegend << "\n";
+ const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
+ unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
+ LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (unsigned C = FirstCycle; C <= LastCycle; ++C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ unsigned C = FirstCycle;
+ for (; C <= LastCycle; ++C) {
+ if (C == SU->TopReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI->ProcResourceIdx);
+ dbgs() << llvm::left_justify(ResName, HeaderColWidth);
+ for (; C < SU->TopReadyCycle; ++C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned i = 0; i < PI->Cycles; ++i, ++C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C++ <= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (BottomUp):\n";
+ dbgs() << scheduleTableLegend << "\n";
+
+ const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
+ int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
+ LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (int C = FirstCycle; C >= LastCycle; --C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ int C = FirstCycle;
+ for (; C >= LastCycle; --C) {
+ if (C == (int)SU->BotReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI->ProcResourceIdx);
+ dbgs() << llvm::left_justify(ResName, HeaderColWidth);
+ for (; C > (int)SU->BotReadyCycle; --C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned i = 0; i < PI->Cycles; ++i, --C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C-- >= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+#endif
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
+ if (MISchedDumpScheduleTrace) {
+ if (ForceTopDown)
+ dumpScheduleTraceTopDown();
+ else if (ForceBottomUp)
+ dumpScheduleTraceBottomUp();
+ else {
+ dbgs() << "* Schedule table (Bidirectional): not implemented\n";
+ }
+ }
+
for (MachineInstr &MI : *this) {
if (SUnit *SU = getSUnit(&MI))
dumpNode(*SU);
diff --git a/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
new file mode 100644
index 000000000000..c05bdfb13370
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
@@ -0,0 +1,83 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \
+# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN: -misched-topdown=true -sched-print-cycles=true \
+# RUN: -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-header-width=21 \
+# RUN: 2>&1 | FileCheck %s --check-prefix=TOP --strict-whitespace
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \
+# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN: -misched-bottomup=true -sched-print-cycles=true \
+# RUN: -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-width=4 \
+# RUN: 2>&1 | FileCheck %s --check-prefix=BOTTOM --strict-whitespace
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \
+# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN: -sched-print-cycles=true -misched-dump-schedule-trace=true \
+# RUN: 2>&1 | FileCheck %s --check-prefix=BIDIRECTIONAL
+
+# REQUIRES: asserts, aarch64-registered-target
+---
+name: f
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $x6, $q0
+ %14:fpr128 = EXTv16i8 $q0, $q0, 8
+ $x3 = ADDXrr $x0, $x0
+ $x4 = ADDXrr $x1, $x1
+ $x5 = ADDXrr $x2, $x2
+ $x7 = ADDXrr $x6, $x6
+
+# TOP-LABEL: *** Final schedule for %bb.0 ***
+# TOP-NEXT: * Schedule table (TopDown):
+# TOP-NEXT: i: issue
+# TOP-NEXT: x: resource booked
+# TOP-NEXT: Cycle | 0 | 1 | 2 |
+# TOP-NEXT: SU(0) | i | | |
+# TOP-NEXT: CortexA55UnitFPALU | x | x | |
+# TOP-NEXT: SU(1) | i | | |
+# TOP-NEXT: CortexA55UnitALU | x | | |
+# TOP-NEXT: SU(2) | | i | |
+# TOP-NEXT: CortexA55UnitALU | | x | |
+# TOP-NEXT: SU(3) | | i | |
+# TOP-NEXT: CortexA55UnitALU | | x | |
+# TOP-NEXT: SU(4) | | | i |
+# TOP-NEXT: CortexA55UnitALU | | | x |
+# TOP-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]: dead %0:fpr128 = EXTv16i8 $q0, $q0, 8
+# TOP-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x3 = ADDXrr $x0, $x0
+# TOP-NEXT: SU(2) [TopReadyCycle = 1, BottomReadyCycle = 0]: $x4 = ADDXrr $x1, $x1
+# TOP-NEXT: SU(3) [TopReadyCycle = 1, BottomReadyCycle = 0]: $x5 = ADDXrr $x2, $x2
+# TOP-NEXT: SU(4) [TopReadyCycle = 2, BottomReadyCycle = 0]: $x7 = ADDXrr $x6, $x6
+
+# BOTTOM-LABEL: *** Final schedule for %bb.0 ***
+# BOTTOM-NEXT: * Schedule table (BottomUp):
+# BOTTOM-NEXT: i: issue
+# BOTTOM-NEXT: x: resource booked
+# BOTTOM-NEXT: Cycle | 3 | 2 | 1 | 0 |
+# BOTTOM-NEXT: SU(0) | i | | | |
+# BOTTOM-NEXT: CortexA55UnitFPALU | x | x | | |
+# BOTTOM-NEXT: SU(1) | | | i | |
+# BOTTOM-NEXT: CortexA55UnitALU | | | x | |
+# BOTTOM-NEXT: SU(2) | | | i | |
+# BOTTOM-NEXT: CortexA55UnitALU | | | x | |
+# BOTTOM-NEXT: SU(3) | | | | i |
+# BOTTOM-NEXT: CortexA55UnitALU | | | | x |
+# BOTTOM-NEXT: SU(4) | | | | i |
+# BOTTOM-NEXT: CortexA55UnitALU | | | | x |
+# BOTTOM-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]: dead %0:fpr128 = EXTv16i8 $q0, $q0, 8
+# BOTTOM-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x3 = ADDXrr $x0, $x0
+# BOTTOM-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x4 = ADDXrr $x1, $x1
+# BOTTOM-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x5 = ADDXrr $x2, $x2
+# BOTTOM-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x7 = ADDXrr $x6, $x6
+
+# This test shows that at the moment we cannot generate the trace of
+# bidirectional scheduling as the values of TopReadyCycle and
+# BottomReadyCycle are inconsistent.
+
+# BIDIRECTIONAL-LABEL: *** Final schedule for %bb.0 ***
+# BIDIRECTIONAL-NEXT: * Schedule table (Bidirectional): not implemented
+# BIDIRECTIONAL-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 3]: dead %0:fpr128 = EXTv16i8 $q0, $q0, 8
+# BIDIRECTIONAL-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x3 = ADDXrr $x0, $x0
+# BIDIRECTIONAL-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 1]: $x4 = ADDXrr $x1, $x1
+# BIDIRECTIONAL-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x5 = ADDXrr $x2, $x2
+# BIDIRECTIONAL-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]: $x7 = ADDXrr $x6, $x6
More information about the llvm-commits
mailing list