[llvm] [llvm][CodeGen] Fixed a bug in stall cycle calculation for window scheduler (PR #99451)

Kai Yan via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 21 20:29:50 PDT 2024


https://github.com/kaiyan96 updated https://github.com/llvm/llvm-project/pull/99451

>From 504d488a749c955aed846baffbf17610e047f6f3 Mon Sep 17 00:00:00 2001
From: aklkaiyan <aklkaiyan at tencent.com>
Date: Thu, 18 Jul 2024 16:19:02 +0800
Subject: [PATCH 1/2] [llvm][CodeGen] Fixed a bug in stall cycle calculation
 for window scheduler

---
 llvm/lib/CodeGen/WindowScheduler.cpp          |  8 ++-
 .../CodeGen/Hexagon/swp-ws-stall-cycle.mir    | 59 +++++++++++++++++++
 2 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir

diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 0777480499e55..fcadc18ac8ef8 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -485,6 +485,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
 // ========================================
 int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
   int MaxStallCycle = 0;
+  int CurrentII = MaxCycle + 1;
   auto Range = getScheduleRange(Offset, SchedInstrNum);
   for (auto &MI : Range) {
     auto *SU = TripleDAG->getSUnit(&MI);
@@ -492,8 +493,9 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
     for (auto &Succ : SU->Succs) {
       if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
         continue;
-      // If the expected cycle does not exceed MaxCycle, no check is needed.
-      if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
+      // If the expected cycle does not exceed loop initiation interval, no
+      // check is needed.
+      if (DefCycle + (int)Succ.getLatency() <= CurrentII)
         continue;
       // If the cycle of the scheduled MI A is less than that of the scheduled
       // MI B, the scheduling will fail because the lifetime of the
@@ -503,7 +505,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
       if (DefCycle < UseCycle)
         return WindowIILimit;
       // Get the stall cycle introduced by the register between two trips.
-      int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
+      int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
       MaxStallCycle = std::max(MaxStallCycle, StallCycle);
     }
   }
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir
new file mode 100644
index 0000000000000..ddba67d78eb58
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir
@@ -0,0 +1,59 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs \
+# RUN: -window-region-limit=1 -window-search-ratio=100 -window-diff-limit=0 \
+# RUN: 2>&1 | FileCheck %s
+
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+
+---
+name:            test_window_stall_cycle
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.3(0x40000000), %bb.1(0x40000000)
+    liveins: $r0, $r1
+  
+    %0:intregs = COPY $r1
+    %1:intregs = COPY $r0
+    %2:intregs = nsw A2_add %0, %1
+    %3:intregs = S2_lsr_i_r_acc %2, %2, 31
+    %4:intregs = S2_asr_i_r killed %3, 1
+    %5:predregs = C2_cmpgt %1, %4
+    %6:intregs = A2_tfrsi 0
+    J2_jumpt killed %5, %bb.3, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+  
+  bb.1:
+    successors: %bb.2(0x80000000)
+  
+    %7:intregs = A2_addi %4, 2
+    %8:intregs = A2_tfrsi 0
+    %9:intregs = A2_sub %4, %1
+    %10:intregs = A2_addi %9, 1
+    %11:intregs = COPY %10
+    J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+  
+  bb.2 (machine-block-address-taken):
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  
+    %12:intregs = PHI %7, %bb.1, %13, %bb.2
+    %14:intregs = PHI %8, %bb.1, %15, %bb.2
+    %16:intregs = PHI %8, %bb.1, %17, %bb.2
+    %18:intregs, %13:intregs = L2_loadri_pi %12, -4
+    %17:intregs = nsw A2_add killed %18, %16
+    %15:intregs = A2_max %17, %14
+    ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.3, implicit-def dead $pc
+  
+  bb.3:
+    %19:intregs = PHI %6, %bb.0, %15, %bb.2
+    $r0 = COPY %19
+    PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+...

>From 85594e837bec5375e1969e8e29ceee4326b8aabe Mon Sep 17 00:00:00 2001
From: aklkaiyan <aklkaiyan at tencent.com>
Date: Mon, 22 Jul 2024 11:29:27 +0800
Subject: [PATCH 2/2] [llvm][CodeGen] Modifications made based on review
 comments for bugfix_ws_stall_cycle

---
 llvm/lib/CodeGen/WindowScheduler.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index fcadc18ac8ef8..aece214f7ddbb 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -493,8 +493,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
     for (auto &Succ : SU->Succs) {
       if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
         continue;
-      // If the expected cycle does not exceed loop initiation interval, no
-      // check is needed.
+      // If the expected cycle does not exceed CurrentII, no check is needed.
       if (DefCycle + (int)Succ.getLatency() <= CurrentII)
         continue;
       // If the cycle of the scheduled MI A is less than that of the scheduled



More information about the llvm-commits mailing list