[llvm] [llvm][CodeGen] Fixed a bug in stall cycle calculation for window scheduler (PR #99451)
Kai Yan via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 21 20:29:50 PDT 2024
https://github.com/kaiyan96 updated https://github.com/llvm/llvm-project/pull/99451
>From 504d488a749c955aed846baffbf17610e047f6f3 Mon Sep 17 00:00:00 2001
From: aklkaiyan <aklkaiyan at tencent.com>
Date: Thu, 18 Jul 2024 16:19:02 +0800
Subject: [PATCH 1/2] [llvm][CodeGen] Fixed a bug in stall cycle calculation
for window scheduler
---
llvm/lib/CodeGen/WindowScheduler.cpp | 8 ++-
.../CodeGen/Hexagon/swp-ws-stall-cycle.mir | 59 +++++++++++++++++++
2 files changed, 64 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir
diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 0777480499e55..fcadc18ac8ef8 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -485,6 +485,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
// ========================================
int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
int MaxStallCycle = 0;
+ int CurrentII = MaxCycle + 1;
auto Range = getScheduleRange(Offset, SchedInstrNum);
for (auto &MI : Range) {
auto *SU = TripleDAG->getSUnit(&MI);
@@ -492,8 +493,9 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
for (auto &Succ : SU->Succs) {
if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
continue;
- // If the expected cycle does not exceed MaxCycle, no check is needed.
- if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
+ // If the expected cycle does not exceed loop initiation interval, no
+ // check is needed.
+ if (DefCycle + (int)Succ.getLatency() <= CurrentII)
continue;
// If the cycle of the scheduled MI A is less than that of the scheduled
// MI B, the scheduling will fail because the lifetime of the
@@ -503,7 +505,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
if (DefCycle < UseCycle)
return WindowIILimit;
// Get the stall cycle introduced by the register between two trips.
- int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
+ int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
MaxStallCycle = std::max(MaxStallCycle, StallCycle);
}
}
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir
new file mode 100644
index 0000000000000..ddba67d78eb58
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir
@@ -0,0 +1,59 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs \
+# RUN: -window-region-limit=1 -window-search-ratio=100 -window-diff-limit=0 \
+# RUN: 2>&1 | FileCheck %s
+
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+
+---
+name: test_window_stall_cycle
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ liveins: $r0, $r1
+
+ %0:intregs = COPY $r1
+ %1:intregs = COPY $r0
+ %2:intregs = nsw A2_add %0, %1
+ %3:intregs = S2_lsr_i_r_acc %2, %2, 31
+ %4:intregs = S2_asr_i_r killed %3, 1
+ %5:predregs = C2_cmpgt %1, %4
+ %6:intregs = A2_tfrsi 0
+ J2_jumpt killed %5, %bb.3, implicit-def dead $pc
+ J2_jump %bb.1, implicit-def dead $pc
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ %7:intregs = A2_addi %4, 2
+ %8:intregs = A2_tfrsi 0
+ %9:intregs = A2_sub %4, %1
+ %10:intregs = A2_addi %9, 1
+ %11:intregs = COPY %10
+ J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+
+ bb.2 (machine-block-address-taken):
+ successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+
+ %12:intregs = PHI %7, %bb.1, %13, %bb.2
+ %14:intregs = PHI %8, %bb.1, %15, %bb.2
+ %16:intregs = PHI %8, %bb.1, %17, %bb.2
+ %18:intregs, %13:intregs = L2_loadri_pi %12, -4
+ %17:intregs = nsw A2_add killed %18, %16
+ %15:intregs = A2_max %17, %14
+ ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.3, implicit-def dead $pc
+
+ bb.3:
+ %19:intregs = PHI %6, %bb.0, %15, %bb.2
+ $r0 = COPY %19
+ PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+...
>From 85594e837bec5375e1969e8e29ceee4326b8aabe Mon Sep 17 00:00:00 2001
From: aklkaiyan <aklkaiyan at tencent.com>
Date: Mon, 22 Jul 2024 11:29:27 +0800
Subject: [PATCH 2/2] [llvm][CodeGen] Modifications made based on review
comments for bugfix_ws_stall_cycle
---
llvm/lib/CodeGen/WindowScheduler.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index fcadc18ac8ef8..aece214f7ddbb 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -493,8 +493,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
for (auto &Succ : SU->Succs) {
if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
continue;
- // If the expected cycle does not exceed loop initiation interval, no
- // check is needed.
+ // If the expected cycle does not exceed CurrentII, no check is needed.
if (DefCycle + (int)Succ.getLatency() <= CurrentII)
continue;
// If the cycle of the scheduled MI A is less than that of the scheduled
More information about the llvm-commits
mailing list