<div dir="ltr"><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">+/// The cyclic path estimation identifies a def-use pair that crosses the back</span><br style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">
<span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">+/// end and considers the depth and height of the nodes. For example, consider</span><br><div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px"><br>
</span></div><div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">Do you mean "back edge" instead of "back end"?</span></div><div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px"><br>
</span></div><div><span style="color:rgb(0,0,0);font-family:arial,sans-serif;font-size:13px">-- Sean Silva</span></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, Aug 29, 2013 at 2:04 PM, Andrew Trick <span dir="ltr"><<a href="mailto:atrick@apple.com" target="_blank">atrick@apple.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: atrick<br>
Date: Thu Aug 29 13:04:49 2013<br>
New Revision: 189597<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=189597&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=189597&view=rev</a><br>
Log:<br>
Comment and revise the cyclic critical path code.<br>
<br>
This should be much more clear now. It's still disabled pending testing.<br>
<br>
Modified:<br>
    llvm/trunk/include/llvm/CodeGen/MachineScheduler.h<br>
    llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h<br>
    llvm/trunk/lib/CodeGen/MachineScheduler.cpp<br>
    llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp<br>
<br>
Modified: llvm/trunk/include/llvm/CodeGen/MachineScheduler.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineScheduler.h?rev=189597&r1=189596&r2=189597&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineScheduler.h?rev=189597&r1=189596&r2=189597&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/include/llvm/CodeGen/MachineScheduler.h (original)<br>
+++ llvm/trunk/include/llvm/CodeGen/MachineScheduler.h Thu Aug 29 13:04:49 2013<br>
@@ -331,6 +331,9 @@ public:<br>
<br>
   BitVector &getScheduledTrees() { return ScheduledTrees; }<br>
<br>
+  /// Compute the cyclic critical path through the DAG.<br>
+  unsigned computeCyclicCriticalPath();<br>
+<br>
   void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;<br>
   void viewGraph() LLVM_OVERRIDE;<br>
<br>
<br>
Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=189597&r1=189596&r2=189597&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=189597&r1=189596&r2=189597&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)<br>
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Thu Aug 29 13:04:49 2013<br>
@@ -197,9 +197,6 @@ namespace llvm {<br>
     /// input.<br>
     void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker = 0);<br>
<br>
-    /// Compute the cyclic critical path through the DAG.<br>
-    unsigned computeCyclicCriticalPath();<br>
-<br>
     /// addSchedBarrierDeps - Add dependencies from instructions in the current<br>
     /// list of instructions being scheduled to scheduling barrier. We want to<br>
     /// make sure instructions which define registers that are either used by<br>
<br>
Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=189597&r1=189596&r2=189597&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=189597&r1=189596&r2=189597&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Thu Aug 29 13:04:49 2013<br>
@@ -642,6 +642,90 @@ void ScheduleDAGMI::findRootsAndBiasEdge<br>
   ExitSU.biasCriticalPath();<br>
 }<br>
<br>
+/// Compute the max cyclic critical path through the DAG. The scheduling DAG<br>
+/// only provides the critical path for single block loops. To handle loops that<br>
+/// span blocks, we could use the vreg path latencies provided by<br>
+/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently<br>
+/// available for use in the scheduler.<br>
+///<br>
+/// The cyclic path estimation identifies a def-use pair that crosses the back<br>
+/// end and considers the depth and height of the nodes. For example, consider<br>
+/// the following instruction sequence where each instruction has unit latency<br>
+/// and defines an epomymous virtual register:<br>
+///<br>
+/// a->b(a,c)->c(b)->d(c)->exit<br>
+///<br>
+/// The cyclic critical path is a two cycles: b->c->b<br>
+/// The acyclic critical path is four cycles: a->b->c->d->exit<br>
+/// LiveOutHeight = height(c) = len(c->d->exit) = 2<br>
+/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3<br>
+/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4<br>
+/// LiveInDepth = depth(b) = len(a->b) = 1<br>
+///<br>
+/// LiveOutDepth - LiveInDepth = 3 - 1 = 2<br>
+/// LiveInHeight - LiveOutHeight = 4 - 2 = 2<br>
+/// CyclicCriticalPath = min(2, 2) = 2<br>
+unsigned ScheduleDAGMI::computeCyclicCriticalPath() {<br>
+  // This only applies to single block loop.<br>
+  if (!BB->isSuccessor(BB))<br>
+    return 0;<br>
+<br>
+  unsigned MaxCyclicLatency = 0;<br>
+  // Visit each live out vreg def to find def/use pairs that cross iterations.<br>
+  ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;<br>
+  for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();<br>
+       RI != RE; ++RI) {<br>
+    unsigned Reg = *RI;<br>
+    if (!TRI->isVirtualRegister(Reg))<br>
+        continue;<br>
+    const LiveInterval &LI = LIS->getInterval(Reg);<br>
+    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));<br>
+    if (!DefVNI)<br>
+      continue;<br>
+<br>
+    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);<br>
+    const SUnit *DefSU = getSUnit(DefMI);<br>
+    if (!DefSU)<br>
+      continue;<br>
+<br>
+    unsigned LiveOutHeight = DefSU->getHeight();<br>
+    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;<br>
+    // Visit all local users of the vreg def.<br>
+    for (VReg2UseMap::iterator<br>
+           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {<br>
+      if (UI->SU == &ExitSU)<br>
+        continue;<br>
+<br>
+      // Only consider uses of the phi.<br>
+      LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(UI->SU->getInstr()));<br>
+      if (!LRQ.valueIn()->isPHIDef())<br>
+        continue;<br>
+<br>
+      // Assume that a path spanning two iterations is a cycle, which could<br>
+      // overestimate in strange cases. This allows cyclic latency to be<br>
+      // estimated as the minimum slack of the vreg's depth or height.<br>
+      unsigned CyclicLatency = 0;<br>
+      if (LiveOutDepth > UI->SU->getDepth())<br>
+        CyclicLatency = LiveOutDepth - UI->SU->getDepth();<br>
+<br>
+      unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;<br>
+      if (LiveInHeight > LiveOutHeight) {<br>
+        if (LiveInHeight - LiveOutHeight < CyclicLatency)<br>
+          CyclicLatency = LiveInHeight - LiveOutHeight;<br>
+      }<br>
+      else<br>
+        CyclicLatency = 0;<br>
+<br>
+      DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("<br>
+            << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");<br>
+      if (CyclicLatency > MaxCyclicLatency)<br>
+        MaxCyclicLatency = CyclicLatency;<br>
+    }<br>
+  }<br>
+  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");<br>
+  return MaxCyclicLatency;<br>
+}<br>
+<br>
 /// Identify DAG roots and setup scheduler queues.<br>
 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,<br>
                                ArrayRef<SUnit*> BotRoots) {<br>
@@ -1557,21 +1641,39 @@ void ConvergingScheduler::releaseBottomN<br>
   Bot.releaseNode(SU, SU->BotReadyCycle);<br>
 }<br>
<br>
+/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic<br>
+/// critical path by more cycles than it takes to drain the instruction buffer.<br>
+/// We estimate an upper bounds on in-flight instructions as:<br>
+///<br>
+/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )<br>
+/// InFlightIterations = AcyclicPath / CyclesPerIteration<br>
+/// InFlightResources = InFlightIterations * LoopResources<br>
+///<br>
+/// TODO: Check execution resources in addition to IssueCount.<br>
 void ConvergingScheduler::checkAcyclicLatency() {<br>
   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)<br>
     return;<br>
<br>
+  // Scaled number of cycles per loop iteration.<br>
+  unsigned IterCount =<br>
+    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),<br>
+             Rem.RemIssueCount);<br>
+  // Scaled acyclic critical path.<br>
+  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();<br>
+  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop<br>
+  unsigned InFlightCount =<br>
+    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;<br>
   unsigned BufferLimit =<br>
     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();<br>
-  unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath;<br>
-  Rem.IsAcyclicLatencyLimited =<br>
-    (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit;<br>
-<br>
-  DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / "<br>
-        << Rem.RemIssueCount << "u = "<br>
-        << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount << " iters. "<br>
-        << "Latency = " << LatencyLag << "c = "<br>
-        << LatencyLag * SchedModel->getLatencyFactor() << "u\n";<br>
+<br>
+  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;<br>
+<br>
+  DEBUG(dbgs() << "IssueCycles="<br>
+        << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "<br>
+        << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()<br>
+        << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount<br>
+        << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()<br>
+        << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";<br>
         if (Rem.IsAcyclicLatencyLimited)<br>
           dbgs() << "  ACYCLIC LATENCY LIMIT\n");<br>
 }<br>
@@ -1579,10 +1681,6 @@ void ConvergingScheduler::checkAcyclicLa<br>
 void ConvergingScheduler::registerRoots() {<br>
   Rem.CriticalPath = DAG->ExitSU.getDepth();<br>
<br>
-  if (EnableCyclicPath) {<br>
-    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();<br>
-    checkAcyclicLatency();<br>
-  }<br>
   // Some roots may not feed into ExitSU. Check all of them in case.<br>
   for (std::vector<SUnit*>::const_iterator<br>
          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {<br>
@@ -1590,6 +1688,11 @@ void ConvergingScheduler::registerRoots(<br>
       Rem.CriticalPath = (*I)->getDepth();<br>
   }<br>
   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');<br>
+<br>
+  if (EnableCyclicPath) {<br>
+    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();<br>
+    checkAcyclicLatency();<br>
+  }<br>
 }<br>
<br>
 /// Does this SU have a hazard within the current instruction group.<br>
<br>
Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=189597&r1=189596&r2=189597&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=189597&r1=189596&r2=189597&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Thu Aug 29 13:04:49 2013<br>
@@ -987,65 +987,6 @@ void ScheduleDAGInstrs::buildSchedGraph(<br>
   PendingLoads.clear();<br>
 }<br>
<br>
-/// Compute the max cyclic critical path through the DAG. For loops that span<br>
-/// basic blocks, MachineTraceMetrics should be used for this instead.<br>
-unsigned ScheduleDAGInstrs::computeCyclicCriticalPath() {<br>
-  // This only applies to single block loop.<br>
-  if (!BB->isSuccessor(BB))<br>
-    return 0;<br>
-<br>
-  unsigned MaxCyclicLatency = 0;<br>
-  // Visit each live out vreg def to find def/use pairs that cross iterations.<br>
-  for (SUnit::const_pred_iterator<br>
-         PI = ExitSU.Preds.begin(), PE = ExitSU.Preds.end(); PI != PE; ++PI) {<br>
-    MachineInstr *MI = PI->getSUnit()->getInstr();<br>
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {<br>
-      const MachineOperand &MO = MI->getOperand(i);<br>
-      if (!MO.isReg() || !MO.isDef())<br>
-        break;<br>
-      unsigned Reg = MO.getReg();<br>
-      if (!Reg || TRI->isPhysicalRegister(Reg))<br>
-        continue;<br>
-<br>
-      const LiveInterval &LI = LIS->getInterval(Reg);<br>
-      unsigned LiveOutHeight = PI->getSUnit()->getHeight();<br>
-      unsigned LiveOutDepth = PI->getSUnit()->getDepth() + PI->getLatency();<br>
-      // Visit all local users of the vreg def.<br>
-      for (VReg2UseMap::iterator<br>
-             UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {<br>
-        if (UI->SU == &ExitSU)<br>
-          continue;<br>
-<br>
-        // Only consider uses of the phi.<br>
-        LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(UI->SU->getInstr()));<br>
-        if (!LRQ.valueIn()->isPHIDef())<br>
-          continue;<br>
-<br>
-        // Cheat a bit and assume that a path spanning two iterations is a<br>
-        // cycle, which could overestimate in strange cases. This allows cyclic<br>
-        // latency to be estimated as the minimum height or depth slack.<br>
-        unsigned CyclicLatency = 0;<br>
-        if (LiveOutDepth > UI->SU->getDepth())<br>
-          CyclicLatency = LiveOutDepth - UI->SU->getDepth();<br>
-        unsigned LiveInHeight = UI->SU->getHeight() + PI->getLatency();<br>
-        if (LiveInHeight > LiveOutHeight) {<br>
-          if (LiveInHeight - LiveOutHeight < CyclicLatency)<br>
-            CyclicLatency = LiveInHeight - LiveOutHeight;<br>
-        }<br>
-        else<br>
-          CyclicLatency = 0;<br>
-        DEBUG(dbgs() << "Cyclic Path: SU(" << PI->getSUnit()->NodeNum<br>
-              << ") -> SU(" << UI->SU->NodeNum << ") = "<br>
-              << CyclicLatency << "\n");<br>
-        if (CyclicLatency > MaxCyclicLatency)<br>
-          MaxCyclicLatency = CyclicLatency;<br>
-      }<br>
-    }<br>
-  }<br>
-  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "\n");<br>
-  return MaxCyclicLatency;<br>
-}<br>
-<br>
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {<br>
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)<br>
   SU->getInstr()->dump();<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>