<div dir="ltr"><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">+/// Compute the max cyclic critical path through the DAG. For loops that span</span><br style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">
<span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">+/// basic blocks, MachineTraceMetrics should be used for this instead.</span><br style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">
<span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">+unsigned ScheduleDAGInstrs::</span><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">computeCyclicCriticalPath() {</span><br>
<div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px"><br></span></div><div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">This seems to suggest that MachineTraceMetrics provides a superset of the functionality that this routine provides (>1BB vs 1BB). What is the rationale for having the routine then?</span></div>
<div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px"><br></span></div><div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">-- Sean Silva</span></div></div><div class="gmail_extra">
<br><br><div class="gmail_quote">On Fri, Aug 23, 2013 at 1:48 PM, Andrew Trick <span dir="ltr"><<a href="mailto:atrick@apple.com" target="_blank">atrick@apple.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Author: atrick<br>
Date: Fri Aug 23 12:48:43 2013<br>
New Revision: 189120<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=189120&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=189120&view=rev</a><br>
Log:<br>
Adds cyclic critical path computation and heuristics, temporarily disabled.<br>
<br>
Estimate the cyclic critical path within a single block loop. If the<br>
acyclic critical path is longer, then the loop will exhaust OOO<br>
resources after some number of iterations. If lag between the acyclic<br>
critical path and cyclic critical path is longer the the time it takes<br>
to issue those loop iterations, then aggressively schedule for<br>
latency.<br>
<br>
Modified:<br>
    llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h<br>
    llvm/trunk/lib/CodeGen/MachineScheduler.cpp<br>
    llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp<br>
<br>
Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=189120&r1=189119&r2=189120&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=189120&r1=189119&r2=189120&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)<br>
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Fri Aug 23 12:48:43 2013<br>
@@ -197,6 +197,9 @@ namespace llvm {<br>
     /// input.<br>
     void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker = 0);<br>
<br>
+    /// Compute the cyclic critical path through the DAG.<br>
+    unsigned computeCyclicCriticalPath();<br>
+<br>
     /// addSchedBarrierDeps - Add dependencies from instructions in the current<br>
     /// list of instructions being scheduled to scheduling barrier. We want to<br>
     /// make sure instructions which define registers that are either used by<br>
<br>
Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=189120&r1=189119&r2=189120&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=189120&r1=189119&r2=189120&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Fri Aug 23 12:48:43 2013<br>
@@ -53,6 +53,9 @@ static cl::opt<unsigned> MISchedCutoff("<br>
 static bool ViewMISchedDAGs = false;<br>
 #endif // NDEBUG<br>
<br>
+static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,<br>
+  cl::desc("Enable cyclic critical path analysis."), cl::init(false));<br>
+<br>
 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,<br>
   cl::desc("Enable load clustering."), cl::init(true));<br>
<br>
@@ -1207,16 +1210,21 @@ public:<br>
   struct SchedRemainder {<br>
     // Critical path through the DAG in expected latency.<br>
     unsigned CriticalPath;<br>
+    unsigned CyclicCritPath;<br>
<br>
     // Scaled count of micro-ops left to schedule.<br>
     unsigned RemIssueCount;<br>
<br>
+    bool IsAcyclicLatencyLimited;<br>
+<br>
     // Unscheduled resources<br>
     SmallVector<unsigned, 16> RemainingCounts;<br>
<br>
     void reset() {<br>
       CriticalPath = 0;<br>
+      CyclicCritPath = 0;<br>
       RemIssueCount = 0;<br>
+      IsAcyclicLatencyLimited = false;<br>
       RemainingCounts.clear();<br>
     }<br>
<br>
@@ -1434,6 +1442,8 @@ public:<br>
   virtual void registerRoots();<br>
<br>
 protected:<br>
+  void checkAcyclicLatency();<br>
+<br>
   void tryCandidate(SchedCandidate &Cand,<br>
                     SchedCandidate &TryCand,<br>
                     SchedBoundary &Zone,<br>
@@ -1547,8 +1557,32 @@ void ConvergingScheduler::releaseBottomN<br>
   Bot.releaseNode(SU, SU->BotReadyCycle);<br>
 }<br>
<br>
+void ConvergingScheduler::checkAcyclicLatency() {<br>
+  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)<br>
+    return;<br>
+<br>
+  unsigned BufferLimit =<br>
+    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();<br>
+  unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath;<br>
+  Rem.IsAcyclicLatencyLimited =<br>
+    (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit;<br>
+<br>
+  DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / "<br>
+        << Rem.RemIssueCount << "u = "<br>
+        << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount << " iters. "<br>
+        << "Latency = " << LatencyLag << "c = "<br>
+        << LatencyLag * SchedModel->getLatencyFactor() << "u\n";<br>
+        if (Rem.IsAcyclicLatencyLimited)<br>
+          dbgs() << "  ACYCLIC LATENCY LIMIT\n");<br>
+}<br>
+<br>
 void ConvergingScheduler::registerRoots() {<br>
   Rem.CriticalPath = DAG->ExitSU.getDepth();<br>
+<br>
+  if (EnableCyclicPath) {<br>
+    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();<br>
+    checkAcyclicLatency();<br>
+  }<br>
   // Some roots may not feed into ExitSU. Check all of them in case.<br>
   for (std::vector<SUnit*>::const_iterator<br>
          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {<br>
@@ -2096,6 +2130,32 @@ static int biasPhysRegCopy(const SUnit *<br>
   return 0;<br>
 }<br>
<br>
+static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand,<br>
+                       ConvergingScheduler::SchedCandidate &Cand,<br>
+                       ConvergingScheduler::SchedBoundary &Zone) {<br>
+  if (Zone.isTop()) {<br>
+    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {<br>
+      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),<br>
+                  TryCand, Cand, ConvergingScheduler::TopDepthReduce))<br>
+        return true;<br>
+    }<br>
+    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),<br>
+                   TryCand, Cand, ConvergingScheduler::TopPathReduce))<br>
+      return true;<br>
+  }<br>
+  else {<br>
+    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {<br>
+      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),<br>
+                  TryCand, Cand, ConvergingScheduler::BotHeightReduce))<br>
+        return true;<br>
+    }<br>
+    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),<br>
+                   TryCand, Cand, ConvergingScheduler::BotPathReduce))<br>
+      return true;<br>
+  }<br>
+  return false;<br>
+}<br>
+<br>
 /// Apply a set of heursitics to a new candidate. Heuristics are currently<br>
 /// hierarchical. This may be more efficient than a graduated cost model because<br>
 /// we don't need to evaluate all aspects of the model for each node in the<br>
@@ -2135,6 +2195,10 @@ void ConvergingScheduler::tryCandidate(S<br>
                   RegExcess))<br>
     return;<br>
<br>
+  // For loops that are acyclic path limited, aggressively schedule for latency.<br>
+  if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone))<br>
+    return;<br>
+<br>
   // Avoid increasing the max critical pressure in the scheduled region.<br>
   if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,<br>
                   TryCand, Cand, RegCritical))<br>
@@ -2174,27 +2238,10 @@ void ConvergingScheduler::tryCandidate(S<br>
     return;<br>
<br>
   // Avoid serializing long latency dependence chains.<br>
-  if (Cand.Policy.ReduceLatency) {<br>
-    if (Zone.isTop()) {<br>
-      if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {<br>
-        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),<br>
-                    TryCand, Cand, TopDepthReduce))<br>
-          return;<br>
-      }<br>
-      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),<br>
-                     TryCand, Cand, TopPathReduce))<br>
-        return;<br>
-    }<br>
-    else {<br>
-      if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {<br>
-        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),<br>
-                    TryCand, Cand, BotHeightReduce))<br>
-          return;<br>
-      }<br>
-      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),<br>
-                     TryCand, Cand, BotPathReduce))<br>
-        return;<br>
-    }<br>
+  // For acyclic path limited loops, latency was already checked above.<br>
+  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited<br>
+      && tryLatency(TryCand, Cand, Zone)) {<br>
+    return;<br>
   }<br>
<br>
   // Prefer immediate defs/users of the last scheduled instruction. This is a<br>
<br>
Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=189120&r1=189119&r2=189120&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=189120&r1=189119&r2=189120&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Fri Aug 23 12:48:43 2013<br>
@@ -36,6 +36,8 @@<br>
 #include "llvm/Target/TargetMachine.h"<br>
 #include "llvm/Target/TargetRegisterInfo.h"<br>
 #include "llvm/Target/TargetSubtargetInfo.h"<br>
+#include <queue><br>
+<br>
 using namespace llvm;<br>
<br>
 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,<br>
@@ -979,6 +981,65 @@ void ScheduleDAGInstrs::buildSchedGraph(<br>
   PendingLoads.clear();<br>
 }<br>
<br>
+/// Compute the max cyclic critical path through the DAG. For loops that span<br>
+/// basic blocks, MachineTraceMetrics should be used for this instead.<br>
+unsigned ScheduleDAGInstrs::computeCyclicCriticalPath() {<br>
+  // This only applies to single block loop.<br>
+  if (!BB->isSuccessor(BB))<br>
+    return 0;<br>
+<br>
+  unsigned MaxCyclicLatency = 0;<br>
+  // Visit each live out vreg def to find def/use pairs that cross iterations.<br>
+  for (SUnit::const_pred_iterator<br>
+         PI = ExitSU.Preds.begin(), PE = ExitSU.Preds.end(); PI != PE; ++PI) {<br>
+    MachineInstr *MI = PI->getSUnit()->getInstr();<br>
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {<br>
+      const MachineOperand &MO = MI->getOperand(i);<br>
+      if (!MO.isReg() || !MO.isDef())<br>
+        break;<br>
+      unsigned Reg = MO.getReg();<br>
+      if (!Reg || TRI->isPhysicalRegister(Reg))<br>
+        continue;<br>
+<br>
+      const LiveInterval &LI = LIS->getInterval(Reg);<br>
+      unsigned LiveOutHeight = PI->getSUnit()->getHeight();<br>
+      unsigned LiveOutDepth = PI->getSUnit()->getDepth() + PI->getLatency();<br>
+      // Visit all local users of the vreg def.<br>
+      for (VReg2UseMap::iterator<br>
+             UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {<br>
+        if (UI->SU == &ExitSU)<br>
+          continue;<br>
+<br>
+        // Only consider uses of the phi.<br>
+        LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(UI->SU->getInstr()));<br>
+        if (!LRQ.valueIn()->isPHIDef())<br>
+          continue;<br>
+<br>
+        // Cheat a bit and assume that a path spanning two iterations is a<br>
+        // cycle, which could overestimate in strange cases. This allows cyclic<br>
+        // latency to be estimated as the minimum height or depth slack.<br>
+        unsigned CyclicLatency = 0;<br>
+        if (LiveOutDepth > UI->SU->getDepth())<br>
+          CyclicLatency = LiveOutDepth - UI->SU->getDepth();<br>
+        unsigned LiveInHeight = UI->SU->getHeight() + PI->getLatency();<br>
+        if (LiveInHeight > LiveOutHeight) {<br>
+          if (LiveInHeight - LiveOutHeight < CyclicLatency)<br>
+            CyclicLatency = LiveInHeight - LiveOutHeight;<br>
+        }<br>
+        else<br>
+          CyclicLatency = 0;<br>
+        DEBUG(dbgs() << "Cyclic Path: SU(" << PI->getSUnit()->NodeNum<br>
+              << ") -> SU(" << UI->SU->NodeNum << ") = "<br>
+              << CyclicLatency << "\n");<br>
+        if (CyclicLatency > MaxCyclicLatency)<br>
+          MaxCyclicLatency = CyclicLatency;<br>
+      }<br>
+    }<br>
+  }<br>
+  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "\n");<br>
+  return MaxCyclicLatency;<br>
+}<br>
+<br>
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {<br>
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)<br>
   SU->getInstr()->dump();<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>