[llvm] [llvm][CodeGen] Some bugfix for window scheduler (PR #99429)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 17 21:18:48 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-hexagon

Author: Kai Yan (kaiyan96)

<details>
<summary>Changes</summary>

1. Fixed max cycle calculation for zero-cost instructions.
2. Added a new restriction for II by pragma; the window scheduler will not support loops if the Swing Scheduler's II pragma is set.
3. Fixed a bug in stall cycle calculation. This bugfix will not affecting the window scheduler's result.
4. Added missing initialization failure information.

---
Full diff: https://github.com/llvm/llvm-project/pull/99429.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+6-2) 
- (modified) llvm/lib/CodeGen/WindowScheduler.cpp (+20-11) 
- (modified) llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir (+1) 
- (added) llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir (+85) 
- (added) llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir (+47) 


``````````diff
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 497e282bb9768..f97f55d2d3faa 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -528,8 +528,12 @@ bool MachinePipeliner::useSwingModuloScheduler() {
 }
 
 bool MachinePipeliner::useWindowScheduler(bool Changed) {
-  // WindowScheduler does not work when it is off or when SwingModuloScheduler
-  // is successfully scheduled.
+  // WindowScheduler does not work for following cases:
+  // 1. when it is off.
+  // 2. when SwingModuloScheduler is successfully scheduled.
+  // 3. when pragma II is enabled.
+  if (II_setByPragma)
+    return false;
   return WindowSchedulingOption == WindowSchedulingFlag::WS_Force ||
          (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed);
 }
diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 0777480499e55..7801803f82c45 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -232,8 +232,11 @@ bool WindowScheduler::initialize() {
       return false;
     }
     for (auto &Def : MI.all_defs())
-      if (Def.isReg() && Def.getReg().isPhysical())
+      if (Def.isReg() && Def.getReg().isPhysical()) {
+        LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
+                             "window scheduling!\n");
         return false;
+      }
   }
   if (SchedInstrNum <= WindowRegionLimit) {
     LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
@@ -437,14 +440,17 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
       int PredCycle = getOriCycle(PredMI);
       ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
     }
-    // ResourceManager can be used to detect resource conflicts between the
-    // current MI and the previously inserted MIs.
-    while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
-      ++CurCycle;
-      if (CurCycle == (int)WindowIILimit)
-        return CurCycle;
+    // Zero cost instructions do not need to check resource.
+    if (!TII->isZeroCost(MI.getOpcode())) {
+      // ResourceManager can be used to detect resource conflicts between the
+      // current MI and the previously inserted MIs.
+      while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
+        ++CurCycle;
+        if (CurCycle == (int)WindowIILimit)
+          return CurCycle;
+      }
+      RM.reserveResources(*SU, CurCycle);
     }
-    RM.reserveResources(*SU, CurCycle);
     OriToCycle[getOriMI(&MI)] = CurCycle;
     LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
                       << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
@@ -485,6 +491,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
 // ========================================
 int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
   int MaxStallCycle = 0;
+  int CurrentII = MaxCycle + 1;
   auto Range = getScheduleRange(Offset, SchedInstrNum);
   for (auto &MI : Range) {
     auto *SU = TripleDAG->getSUnit(&MI);
@@ -492,8 +499,9 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
     for (auto &Succ : SU->Succs) {
       if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
         continue;
-      // If the expected cycle does not exceed MaxCycle, no check is needed.
-      if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
+      // If the expected cycle does not exceed loop initiation interval, no
+      // check is needed.
+      if (DefCycle + (int)Succ.getLatency() <= CurrentII)
         continue;
       // If the cycle of the scheduled MI A is less than that of the scheduled
       // MI B, the scheduling will fail because the lifetime of the
@@ -503,7 +511,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
       if (DefCycle < UseCycle)
         return WindowIILimit;
       // Get the stall cycle introduced by the register between two trips.
-      int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
+      int StallCycle =
+          DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
       MaxStallCycle = std::max(MaxStallCycle, StallCycle);
     }
   }
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
index 601b98dca8e20..be75301b016ed 100644
--- a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
@@ -3,6 +3,7 @@
 # RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
 # RUN: | FileCheck %s
 
+# CHECK: Physical registers are not supported in window scheduling!
 # CHECK: The WindowScheduler failed to initialize!
 
 ---
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir
new file mode 100644
index 0000000000000..71b653da940e8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir
@@ -0,0 +1,85 @@
+# RUN: llc  --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: | FileCheck %s
+# REQUIRES: asserts
+
+# Test that checks no window scheduler is performed if the II set by pragma was
+# enabled
+
+# CHECK-NOT: Start analyzing II
+# CHECK-NOT: Start scheduling Phis
+# CHECK-NOT: Current window Offset is {{[0-9]+}} and II is {{[0-9]+}}
+
+--- |
+  define void @test_pragma_ii_fail(ptr %a0, i32 %a1) {
+  b0:
+    %v0 = icmp sgt i32 %a1, 1
+    br i1 %v0, label %b1, label %b4
+  
+  b1:                                               ; preds = %b0
+    %v1 = load i32, ptr %a0, align 4
+    %v2 = add i32 %v1, 10
+    %v4 = add i32 %a1, -1
+    %cgep = getelementptr i32, ptr %a0, i32 1
+    br label %b2
+  
+  b2:                                               ; preds = %b2, %b1
+    %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ]
+    %v6 = phi ptr [ %cgep2, %b2 ], [ %cgep, %b1 ]
+    %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ]
+    store i32 %v7, ptr %v6, align 4
+    %v8 = add i32 %v7, 10
+    %cgep1 = getelementptr i32, ptr %v6, i32 -1
+    store i32 %v8, ptr %cgep1, align 4
+    %v10 = add i32 %v7, 10
+    %v12 = add i32 %v5, -1
+    %v13 = icmp eq i32 %v12, 0
+    %cgep2 = getelementptr i32, ptr %v6, i32 1
+    br i1 %v13, label %b4, label %b2, !llvm.loop !0
+  
+  b4:                                               ; preds = %b2, %b0
+    ret void
+  }
+  
+  !0 = distinct !{!0, !1}
+  !1 = !{!"llvm.loop.pipeline.initiationinterval", i32 2}
+...
+---
+name:            test_pragma_ii_fail
+tracksRegLiveness: true
+body:             |
+  bb.0.b0:
+    successors: %bb.1(0x40000000), %bb.3(0x40000000)
+    liveins: $r0, $r1
+
+    %10:intregs = COPY $r1
+    %9:intregs = COPY $r0
+    %11:predregs = C2_cmpgti %10, 1
+    J2_jumpf %11, %bb.3, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+  
+  bb.1.b1:
+    successors: %bb.2(0x80000000)
+  
+    %13:intregs, %2:intregs = L2_loadri_pi %9, 4
+    %0:intregs = A2_addi killed %13, 10
+    %1:intregs = A2_addi %10, -1
+    %16:intregs = COPY %1
+    J2_loop0r %bb.2, %16, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+  
+  bb.2.b2 (machine-block-address-taken):
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  
+    %4:intregs = PHI %2, %bb.1, %8, %bb.2
+    %5:intregs = PHI %0, %bb.1, %6, %bb.2
+    S2_storeri_io %4, 0, %5
+    %6:intregs = A2_addi %5, 10
+    S2_storeri_io %4, -4, %6
+    %8:intregs = A2_addi %4, 4
+    ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.3, implicit-def dead $pc
+  
+  bb.3.b4:
+    PS_jmpret $r31, implicit-def dead $pc
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir
new file mode 100644
index 0000000000000..a93a03bd07dee
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir
@@ -0,0 +1,47 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: | FileCheck %s
+
+# CHECK-NOT: Can't find a valid II. Keep searching...
+
+---
+name:            relu
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+    liveins: $r0, $r1, $r2
+
+    %0:intregs = COPY $r2
+    %1:intregs = COPY $r1
+    %2:intregs = COPY $r0
+    %3:predregs = C2_cmpeqi %2, 0
+    J2_jumpt killed %3, %bb.2, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+
+  bb.1:
+    successors: %bb.3(0x80000000)
+
+    %4:hvxvr = V6_vd0
+    %5:intregs = A2_addi %2, 31
+    %6:intregs = S2_lsr_i_r %5, 5
+    %7:intregs = COPY %6
+    J2_loop0r %bb.3, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+    J2_jump %bb.3, implicit-def dead $pc
+
+  bb.2:
+    PS_jmpret $r31, implicit-def dead $pc
+
+  bb.3 (machine-block-address-taken):
+    successors: %bb.3(0x7c000000), %bb.2(0x04000000)
+
+    %8:intregs = PHI %1, %bb.1, %9, %bb.3
+    %10:intregs = PHI %0, %bb.1, %14, %bb.3
+    %11:hvxvr, %9:intregs = V6_vL32b_pi %8, 128
+    %12:intregs = COPY %10
+    %13:hvxvr = V6_vmaxw killed %11, %4
+    %14:intregs = V6_vS32b_pi %12, 128, killed %13
+    ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.2, implicit-def dead $pc
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/99429


More information about the llvm-commits mailing list