[llvm] [MachineScheduler][ScheduleDAG] Add ability to bias scheduling by longest paths (PR #93223)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Thu May 23 10:55:10 PDT 2024


https://github.com/michaelmaitland created https://github.com/llvm/llvm-project/pull/93223

Currently the scheduler only biases by critical path. However, it is possible that there are paths that are long (but not the longest). Bias paths by decreasing order of depth to bias the longest paths.

I saw a small improvement on spec2017/523.xalancbmk_r in dynamic instruction count and in runtime with this change on RISC-V.

I am disabling this feature by default to make minimal impact to targets tuned with the algorithm prior to this patch.

>From e085dc59f29779e1571051a884f9dee9345c89d6 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 20 May 2024 13:43:14 -0700
Subject: [PATCH] [MachineScheduler][ScheduleDAG] Add ability to bias
 scheduling by longest paths

Currently the scheduler only biases by critical path. However, it is
possible that there are paths that are long (but not the longest). Bias
paths by decreasing order of depth to bias the longest paths.

I saw a small improvement on spec2017/523.xalancbmk_r in dynamic
instruction count and in runtime with this change on RISC-V.

I am disabling this feature by default to make minimal impact to targets
tuned with the algorithm prior to this patch.
---
 llvm/include/llvm/CodeGen/ScheduleDAG.h       |  4 ++++
 llvm/lib/CodeGen/MachineScheduler.cpp         | 14 +++++++++--
 llvm/lib/CodeGen/ScheduleDAG.cpp              | 12 ++++++++++
 .../test/CodeGen/X86/misched-critical-path.ll | 23 +++++++++++++------
 4 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index c5172e8c542b7..0eef94dc21cec 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -454,6 +454,10 @@ class TargetRegisterInfo;
     /// edge occurs first.
     void biasCriticalPath();
 
+    /// Orders this node's predecessor edges such that the edges are sorted by
+    /// decreasing depth.
+    void biasLongerPaths();
+
     void dumpAttributes() const;
 
   private:
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 03e892a5e0d22..331a644496c0a 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -193,6 +193,10 @@ static cl::opt<unsigned>
     MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
                      cl::desc("Number of intervals to track"), cl::init(10));
 
+cl::opt<bool> BiasLongerPaths(
+    "misched-bias-longer-paths", cl::Hidden, cl::init(false),
+    cl::desc("Bias longer paths, instead of just the critical path"));
+
 // DAG subtrees must have at least this many nodes.
 static const unsigned MinSubtreeSize = 8;
 
@@ -918,7 +922,10 @@ findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
 
     // Order predecessors so DFSResult follows the critical path.
-    SU.biasCriticalPath();
+    if (BiasLongerPaths)
+      SU.biasLongerPaths();
+    else
+      SU.biasCriticalPath();
 
     // A SUnit is ready to top schedule if it has no predecessors.
     if (!SU.NumPredsLeft)
@@ -927,7 +934,10 @@ findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
     if (!SU.NumSuccsLeft)
       BotRoots.push_back(&SU);
   }
-  ExitSU.biasCriticalPath();
+  if (BiasLongerPaths)
+    ExitSU.biasLongerPaths();
+  else
+    ExitSU.biasCriticalPath();
 }
 
 /// Identify DAG roots and setup scheduler queues.
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index 8d9a5041fc2fe..d3012cd9fc7ed 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -340,6 +340,18 @@ void SUnit::biasCriticalPath() {
     std::swap(*Preds.begin(), *BestI);
 }
 
+void SUnit::biasLongerPaths() {
+  llvm::stable_sort(Preds, [](SDep A, SDep B) {
+    // B should only be ordered before A if it is a data dependency and its
+    // depth is larger.
+    if (B.getKind() == SDep::Data &&
+        A.getSUnit()->getDepth() > B.getSUnit()->getDepth())
+      return true;
+    // Preserve order in all other instances
+    return false;
+  });
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void SUnit::dumpAttributes() const {
   dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
diff --git a/llvm/test/CodeGen/X86/misched-critical-path.ll b/llvm/test/CodeGen/X86/misched-critical-path.ll
index 2a95aaa46d4a4..c9f13a4f3e426 100644
--- a/llvm/test/CodeGen/X86/misched-critical-path.ll
+++ b/llvm/test/CodeGen/X86/misched-critical-path.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-print-dags -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-print-dags -o - 2>&1 > \
+; RUN:   /dev/null | FileCheck %s --check-prefix=CRITICAL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-bias-longer-paths \
+; RUN:   -misched-print-dags -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=LONGEST
 ; REQUIRES: asserts
 
 @sc = common global i8 0
@@ -7,12 +10,18 @@
 
 ; Regression Test for PR92368.
 ;
-; CHECK: SU(8):   CMP8rr %4:gr8, %3:gr8, implicit-def $eflags
-; CHECK:   Predecessors:
-; CHECK-NEXT:    SU(6): Data Latency=0 Reg=%4
-; CHECK-NEXT:    SU(7): Out  Latency=0
-; CHECK-NEXT:    SU(5): Out  Latency=0
-; CHECK-NEXT:    SU(3): Data Latency=4 Reg=%3
+; CRITICAL: SU(8):   CMP8rr %4:gr8, %3:gr8, implicit-def $eflags
+; CRITICAL:   Predecessors:
+; CRITICAL-NEXT:    SU(6): Data Latency=0 Reg=%4
+; CRITICAL-NEXT:    SU(7): Out  Latency=0
+; CRITICAL-NEXT:    SU(5): Out  Latency=0
+; CRITICAL-NEXT:    SU(3): Data Latency=4 Reg=%3
+; LONGEST: SU(8):   CMP8rr %4:gr8, %3:gr8, implicit-def $eflags
+; LONGEST:  Predecessors:
+; LONGEST-NEXT:    SU(7): Out  Latency=0
+; LONGEST-NEXT:    SU(6): Data Latency=0 Reg=%4
+; LONGEST-NEXT:    SU(5): Out  Latency=0
+; LONGEST-NEXT:    SU(3): Data Latency=4 Reg=%3
 define void @misched_bug() nounwind {
 entry:
   %v0 = load i8, ptr @sc, align 1



More information about the llvm-commits mailing list