[llvm] r189597 - Comment and revise the cyclic critical path code.

Sean Silva silvas at purdue.edu
Thu Aug 29 16:15:48 PDT 2013


+/// The cyclic path estimation identifies a def-use pair that crosses the
back
+/// end and considers the depth and height of the nodes. For example,
consider

Do you mean "back edge" instead of "back end"?

-- Sean Silva


On Thu, Aug 29, 2013 at 2:04 PM, Andrew Trick <atrick at apple.com> wrote:

> Author: atrick
> Date: Thu Aug 29 13:04:49 2013
> New Revision: 189597
>
> URL: http://llvm.org/viewvc/llvm-project?rev=189597&view=rev
> Log:
> Comment and revise the cyclic critical path code.
>
> This should be much more clear now. It's still disabled pending testing.
>
> Modified:
>     llvm/trunk/include/llvm/CodeGen/MachineScheduler.h
>     llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
>     llvm/trunk/lib/CodeGen/MachineScheduler.cpp
>     llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>
> Modified: llvm/trunk/include/llvm/CodeGen/MachineScheduler.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineScheduler.h?rev=189597&r1=189596&r2=189597&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/MachineScheduler.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/MachineScheduler.h Thu Aug 29 13:04:49
> 2013
> @@ -331,6 +331,9 @@ public:
>
>    BitVector &getScheduledTrees() { return ScheduledTrees; }
>
> +  /// Compute the cyclic critical path through the DAG.
> +  unsigned computeCyclicCriticalPath();
> +
>    void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;
>    void viewGraph() LLVM_OVERRIDE;
>
>
> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=189597&r1=189596&r2=189597&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Thu Aug 29
> 13:04:49 2013
> @@ -197,9 +197,6 @@ namespace llvm {
>      /// input.
>      void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker
> = 0);
>
> -    /// Compute the cyclic critical path through the DAG.
> -    unsigned computeCyclicCriticalPath();
> -
>      /// addSchedBarrierDeps - Add dependencies from instructions in the
> current
>      /// list of instructions being scheduled to scheduling barrier. We
> want to
>      /// make sure instructions which define registers that are either
> used by
>
> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=189597&r1=189596&r2=189597&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Thu Aug 29 13:04:49 2013
> @@ -642,6 +642,90 @@ void ScheduleDAGMI::findRootsAndBiasEdge
>    ExitSU.biasCriticalPath();
>  }
>
> +/// Compute the max cyclic critical path through the DAG. The scheduling
> DAG
> +/// only provides the critical path for single block loops. To handle
> loops that
> +/// span blocks, we could use the vreg path latencies provided by
> +/// MachineTraceMetrics instead. However, MachineTraceMetrics is not
> currently
> +/// available for use in the scheduler.
> +///
> +/// The cyclic path estimation identifies a def-use pair that crosses the
> back
> +/// end and considers the depth and height of the nodes. For example,
> consider
> +/// the following instruction sequence where each instruction has unit
> latency
> +/// and defines an epomymous virtual register:
> +///
> +/// a->b(a,c)->c(b)->d(c)->exit
> +///
> +/// The cyclic critical path is a two cycles: b->c->b
> +/// The acyclic critical path is four cycles: a->b->c->d->exit
> +/// LiveOutHeight = height(c) = len(c->d->exit) = 2
> +/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
> +/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
> +/// LiveInDepth = depth(b) = len(a->b) = 1
> +///
> +/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
> +/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
> +/// CyclicCriticalPath = min(2, 2) = 2
> +unsigned ScheduleDAGMI::computeCyclicCriticalPath() {
> +  // This only applies to single block loop.
> +  if (!BB->isSuccessor(BB))
> +    return 0;
> +
> +  unsigned MaxCyclicLatency = 0;
> +  // Visit each live out vreg def to find def/use pairs that cross
> iterations.
> +  ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
> +  for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE =
> LiveOuts.end();
> +       RI != RE; ++RI) {
> +    unsigned Reg = *RI;
> +    if (!TRI->isVirtualRegister(Reg))
> +        continue;
> +    const LiveInterval &LI = LIS->getInterval(Reg);
> +    const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
> +    if (!DefVNI)
> +      continue;
> +
> +    MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
> +    const SUnit *DefSU = getSUnit(DefMI);
> +    if (!DefSU)
> +      continue;
> +
> +    unsigned LiveOutHeight = DefSU->getHeight();
> +    unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
> +    // Visit all local users of the vreg def.
> +    for (VReg2UseMap::iterator
> +           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
> +      if (UI->SU == &ExitSU)
> +        continue;
> +
> +      // Only consider uses of the phi.
> +      LiveRangeQuery LRQ(LI,
> LIS->getInstructionIndex(UI->SU->getInstr()));
> +      if (!LRQ.valueIn()->isPHIDef())
> +        continue;
> +
> +      // Assume that a path spanning two iterations is a cycle, which
> could
> +      // overestimate in strange cases. This allows cyclic latency to be
> +      // estimated as the minimum slack of the vreg's depth or height.
> +      unsigned CyclicLatency = 0;
> +      if (LiveOutDepth > UI->SU->getDepth())
> +        CyclicLatency = LiveOutDepth - UI->SU->getDepth();
> +
> +      unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
> +      if (LiveInHeight > LiveOutHeight) {
> +        if (LiveInHeight - LiveOutHeight < CyclicLatency)
> +          CyclicLatency = LiveInHeight - LiveOutHeight;
> +      }
> +      else
> +        CyclicLatency = 0;
> +
> +      DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
> +            << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
> +      if (CyclicLatency > MaxCyclicLatency)
> +        MaxCyclicLatency = CyclicLatency;
> +    }
> +  }
> +  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
> +  return MaxCyclicLatency;
> +}
> +
>  /// Identify DAG roots and setup scheduler queues.
>  void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
>                                 ArrayRef<SUnit*> BotRoots) {
> @@ -1557,21 +1641,39 @@ void ConvergingScheduler::releaseBottomN
>    Bot.releaseNode(SU, SU->BotReadyCycle);
>  }
>
> +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the
> cyclic
> +/// critical path by more cycles than it takes to drain the instruction
> buffer.
> +/// We estimate an upper bounds on in-flight instructions as:
> +///
> +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
> +/// InFlightIterations = AcyclicPath / CyclesPerIteration
> +/// InFlightResources = InFlightIterations * LoopResources
> +///
> +/// TODO: Check execution resources in addition to IssueCount.
>  void ConvergingScheduler::checkAcyclicLatency() {
>    if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
>      return;
>
> +  // Scaled number of cycles per loop iteration.
> +  unsigned IterCount =
> +    std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
> +             Rem.RemIssueCount);
> +  // Scaled acyclic critical path.
> +  unsigned AcyclicCount = Rem.CriticalPath *
> SchedModel->getLatencyFactor();
> +  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
> +  unsigned InFlightCount =
> +    (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
>    unsigned BufferLimit =
>      SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
> -  unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath;
> -  Rem.IsAcyclicLatencyLimited =
> -    (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit;
> -
> -  DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / "
> -        << Rem.RemIssueCount << "u = "
> -        << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount << "
> iters. "
> -        << "Latency = " << LatencyLag << "c = "
> -        << LatencyLag * SchedModel->getLatencyFactor() << "u\n";
> +
> +  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
> +
> +  DEBUG(dbgs() << "IssueCycles="
> +        << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
> +        << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
> +        << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
> +        << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
> +        << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
>          if (Rem.IsAcyclicLatencyLimited)
>            dbgs() << "  ACYCLIC LATENCY LIMIT\n");
>  }
> @@ -1579,10 +1681,6 @@ void ConvergingScheduler::checkAcyclicLa
>  void ConvergingScheduler::registerRoots() {
>    Rem.CriticalPath = DAG->ExitSU.getDepth();
>
> -  if (EnableCyclicPath) {
> -    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
> -    checkAcyclicLatency();
> -  }
>    // Some roots may not feed into ExitSU. Check all of them in case.
>    for (std::vector<SUnit*>::const_iterator
>           I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I)
> {
> @@ -1590,6 +1688,11 @@ void ConvergingScheduler::registerRoots(
>        Rem.CriticalPath = (*I)->getDepth();
>    }
>    DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
> +
> +  if (EnableCyclicPath) {
> +    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
> +    checkAcyclicLatency();
> +  }
>  }
>
>  /// Does this SU have a hazard within the current instruction group.
>
> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=189597&r1=189596&r2=189597&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Thu Aug 29 13:04:49 2013
> @@ -987,65 +987,6 @@ void ScheduleDAGInstrs::buildSchedGraph(
>    PendingLoads.clear();
>  }
>
> -/// Compute the max cyclic critical path through the DAG. For loops that
> span
> -/// basic blocks, MachineTraceMetrics should be used for this instead.
> -unsigned ScheduleDAGInstrs::computeCyclicCriticalPath() {
> -  // This only applies to single block loop.
> -  if (!BB->isSuccessor(BB))
> -    return 0;
> -
> -  unsigned MaxCyclicLatency = 0;
> -  // Visit each live out vreg def to find def/use pairs that cross
> iterations.
> -  for (SUnit::const_pred_iterator
> -         PI = ExitSU.Preds.begin(), PE = ExitSU.Preds.end(); PI != PE;
> ++PI) {
> -    MachineInstr *MI = PI->getSUnit()->getInstr();
> -    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
> -      const MachineOperand &MO = MI->getOperand(i);
> -      if (!MO.isReg() || !MO.isDef())
> -        break;
> -      unsigned Reg = MO.getReg();
> -      if (!Reg || TRI->isPhysicalRegister(Reg))
> -        continue;
> -
> -      const LiveInterval &LI = LIS->getInterval(Reg);
> -      unsigned LiveOutHeight = PI->getSUnit()->getHeight();
> -      unsigned LiveOutDepth = PI->getSUnit()->getDepth() +
> PI->getLatency();
> -      // Visit all local users of the vreg def.
> -      for (VReg2UseMap::iterator
> -             UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
> -        if (UI->SU == &ExitSU)
> -          continue;
> -
> -        // Only consider uses of the phi.
> -        LiveRangeQuery LRQ(LI,
> LIS->getInstructionIndex(UI->SU->getInstr()));
> -        if (!LRQ.valueIn()->isPHIDef())
> -          continue;
> -
> -        // Cheat a bit and assume that a path spanning two iterations is a
> -        // cycle, which could overestimate in strange cases. This allows
> cyclic
> -        // latency to be estimated as the minimum height or depth slack.
> -        unsigned CyclicLatency = 0;
> -        if (LiveOutDepth > UI->SU->getDepth())
> -          CyclicLatency = LiveOutDepth - UI->SU->getDepth();
> -        unsigned LiveInHeight = UI->SU->getHeight() + PI->getLatency();
> -        if (LiveInHeight > LiveOutHeight) {
> -          if (LiveInHeight - LiveOutHeight < CyclicLatency)
> -            CyclicLatency = LiveInHeight - LiveOutHeight;
> -        }
> -        else
> -          CyclicLatency = 0;
> -        DEBUG(dbgs() << "Cyclic Path: SU(" << PI->getSUnit()->NodeNum
> -              << ") -> SU(" << UI->SU->NodeNum << ") = "
> -              << CyclicLatency << "\n");
> -        if (CyclicLatency > MaxCyclicLatency)
> -          MaxCyclicLatency = CyclicLatency;
> -      }
> -    }
> -  }
> -  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "\n");
> -  return MaxCyclicLatency;
> -}
> -
>  void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
>  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
>    SU->getInstr()->dump();
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130829/2bc839b9/attachment.html>


More information about the llvm-commits mailing list