[llvm] r189120 - Adds cyclic critical path computation and heuristics, temporarily disabled.

Sean Silva silvas at purdue.edu
Fri Aug 23 11:56:46 PDT 2013


+/// Compute the max cyclic critical path through the DAG. For loops that
span
+/// basic blocks, MachineTraceMetrics should be used for this instead.
+unsigned ScheduleDAGInstrs::computeCyclicCriticalPath() {

This seems to suggest that MachineTraceMetrics provides a superset of the
functionality that this routine provides (>1BB vs 1BB). What is the
rationale for having the routine then?

-- Sean Silva


On Fri, Aug 23, 2013 at 1:48 PM, Andrew Trick <atrick at apple.com> wrote:

> Author: atrick
> Date: Fri Aug 23 12:48:43 2013
> New Revision: 189120
>
> URL: http://llvm.org/viewvc/llvm-project?rev=189120&view=rev
> Log:
> Adds cyclic critical path computation and heuristics, temporarily disabled.
>
> Estimate the cyclic critical path within a single block loop. If the
> acyclic critical path is longer, then the loop will exhaust OOO
> resources after some number of iterations. If lag between the acyclic
> critical path and cyclic critical path is longer the the time it takes
> to issue those loop iterations, then aggressively schedule for
> latency.
>
> Modified:
>     llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
>     llvm/trunk/lib/CodeGen/MachineScheduler.cpp
>     llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>
> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=189120&r1=189119&r2=189120&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Fri Aug 23
> 12:48:43 2013
> @@ -197,6 +197,9 @@ namespace llvm {
>      /// input.
>      void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker
> = 0);
>
> +    /// Compute the cyclic critical path through the DAG.
> +    unsigned computeCyclicCriticalPath();
> +
>      /// addSchedBarrierDeps - Add dependencies from instructions in the
> current
>      /// list of instructions being scheduled to scheduling barrier. We
> want to
>      /// make sure instructions which define registers that are either
> used by
>
> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=189120&r1=189119&r2=189120&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Fri Aug 23 12:48:43 2013
> @@ -53,6 +53,9 @@ static cl::opt<unsigned> MISchedCutoff("
>  static bool ViewMISchedDAGs = false;
>  #endif // NDEBUG
>
> +static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
> +  cl::desc("Enable cyclic critical path analysis."), cl::init(false));
> +
>  static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
>    cl::desc("Enable load clustering."), cl::init(true));
>
> @@ -1207,16 +1210,21 @@ public:
>    struct SchedRemainder {
>      // Critical path through the DAG in expected latency.
>      unsigned CriticalPath;
> +    unsigned CyclicCritPath;
>
>      // Scaled count of micro-ops left to schedule.
>      unsigned RemIssueCount;
>
> +    bool IsAcyclicLatencyLimited;
> +
>      // Unscheduled resources
>      SmallVector<unsigned, 16> RemainingCounts;
>
>      void reset() {
>        CriticalPath = 0;
> +      CyclicCritPath = 0;
>        RemIssueCount = 0;
> +      IsAcyclicLatencyLimited = false;
>        RemainingCounts.clear();
>      }
>
> @@ -1434,6 +1442,8 @@ public:
>    virtual void registerRoots();
>
>  protected:
> +  void checkAcyclicLatency();
> +
>    void tryCandidate(SchedCandidate &Cand,
>                      SchedCandidate &TryCand,
>                      SchedBoundary &Zone,
> @@ -1547,8 +1557,32 @@ void ConvergingScheduler::releaseBottomN
>    Bot.releaseNode(SU, SU->BotReadyCycle);
>  }
>
> +void ConvergingScheduler::checkAcyclicLatency() {
> +  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
> +    return;
> +
> +  unsigned BufferLimit =
> +    SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
> +  unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath;
> +  Rem.IsAcyclicLatencyLimited =
> +    (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit;
> +
> +  DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / "
> +        << Rem.RemIssueCount << "u = "
> +        << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount << "
> iters. "
> +        << "Latency = " << LatencyLag << "c = "
> +        << LatencyLag * SchedModel->getLatencyFactor() << "u\n";
> +        if (Rem.IsAcyclicLatencyLimited)
> +          dbgs() << "  ACYCLIC LATENCY LIMIT\n");
> +}
> +
>  void ConvergingScheduler::registerRoots() {
>    Rem.CriticalPath = DAG->ExitSU.getDepth();
> +
> +  if (EnableCyclicPath) {
> +    Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
> +    checkAcyclicLatency();
> +  }
>    // Some roots may not feed into ExitSU. Check all of them in case.
>    for (std::vector<SUnit*>::const_iterator
>           I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I)
> {
> @@ -2096,6 +2130,32 @@ static int biasPhysRegCopy(const SUnit *
>    return 0;
>  }
>
> +static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand,
> +                       ConvergingScheduler::SchedCandidate &Cand,
> +                       ConvergingScheduler::SchedBoundary &Zone) {
> +  if (Zone.isTop()) {
> +    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
> +      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> +                  TryCand, Cand, ConvergingScheduler::TopDepthReduce))
> +        return true;
> +    }
> +    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> +                   TryCand, Cand, ConvergingScheduler::TopPathReduce))
> +      return true;
> +  }
> +  else {
> +    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
> +      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> +                  TryCand, Cand, ConvergingScheduler::BotHeightReduce))
> +        return true;
> +    }
> +    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> +                   TryCand, Cand, ConvergingScheduler::BotPathReduce))
> +      return true;
> +  }
> +  return false;
> +}
> +
>  /// Apply a set of heursitics to a new candidate. Heuristics are currently
>  /// hierarchical. This may be more efficient than a graduated cost model
> because
>  /// we don't need to evaluate all aspects of the model for each node in
> the
> @@ -2135,6 +2195,10 @@ void ConvergingScheduler::tryCandidate(S
>                    RegExcess))
>      return;
>
> +  // For loops that are acyclic path limited, aggressively schedule for
> latency.
> +  if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone))
> +    return;
> +
>    // Avoid increasing the max critical pressure in the scheduled region.
>    if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
>                    TryCand, Cand, RegCritical))
> @@ -2174,27 +2238,10 @@ void ConvergingScheduler::tryCandidate(S
>      return;
>
>    // Avoid serializing long latency dependence chains.
> -  if (Cand.Policy.ReduceLatency) {
> -    if (Zone.isTop()) {
> -      if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
> -        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> -                    TryCand, Cand, TopDepthReduce))
> -          return;
> -      }
> -      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> -                     TryCand, Cand, TopPathReduce))
> -        return;
> -    }
> -    else {
> -      if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
> -        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> -                    TryCand, Cand, BotHeightReduce))
> -          return;
> -      }
> -      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> -                     TryCand, Cand, BotPathReduce))
> -        return;
> -    }
> +  // For acyclic path limited loops, latency was already checked above.
> +  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
> +      && tryLatency(TryCand, Cand, Zone)) {
> +    return;
>    }
>
>    // Prefer immediate defs/users of the last scheduled instruction. This
> is a
>
> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=189120&r1=189119&r2=189120&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Fri Aug 23 12:48:43 2013
> @@ -36,6 +36,8 @@
>  #include "llvm/Target/TargetMachine.h"
>  #include "llvm/Target/TargetRegisterInfo.h"
>  #include "llvm/Target/TargetSubtargetInfo.h"
> +#include <queue>
> +
>  using namespace llvm;
>
>  static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
> @@ -979,6 +981,65 @@ void ScheduleDAGInstrs::buildSchedGraph(
>    PendingLoads.clear();
>  }
>
> +/// Compute the max cyclic critical path through the DAG. For loops that
> span
> +/// basic blocks, MachineTraceMetrics should be used for this instead.
> +unsigned ScheduleDAGInstrs::computeCyclicCriticalPath() {
> +  // This only applies to single block loop.
> +  if (!BB->isSuccessor(BB))
> +    return 0;
> +
> +  unsigned MaxCyclicLatency = 0;
> +  // Visit each live out vreg def to find def/use pairs that cross
> iterations.
> +  for (SUnit::const_pred_iterator
> +         PI = ExitSU.Preds.begin(), PE = ExitSU.Preds.end(); PI != PE;
> ++PI) {
> +    MachineInstr *MI = PI->getSUnit()->getInstr();
> +    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
> +      const MachineOperand &MO = MI->getOperand(i);
> +      if (!MO.isReg() || !MO.isDef())
> +        break;
> +      unsigned Reg = MO.getReg();
> +      if (!Reg || TRI->isPhysicalRegister(Reg))
> +        continue;
> +
> +      const LiveInterval &LI = LIS->getInterval(Reg);
> +      unsigned LiveOutHeight = PI->getSUnit()->getHeight();
> +      unsigned LiveOutDepth = PI->getSUnit()->getDepth() +
> PI->getLatency();
> +      // Visit all local users of the vreg def.
> +      for (VReg2UseMap::iterator
> +             UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
> +        if (UI->SU == &ExitSU)
> +          continue;
> +
> +        // Only consider uses of the phi.
> +        LiveRangeQuery LRQ(LI,
> LIS->getInstructionIndex(UI->SU->getInstr()));
> +        if (!LRQ.valueIn()->isPHIDef())
> +          continue;
> +
> +        // Cheat a bit and assume that a path spanning two iterations is a
> +        // cycle, which could overestimate in strange cases. This allows
> cyclic
> +        // latency to be estimated as the minimum height or depth slack.
> +        unsigned CyclicLatency = 0;
> +        if (LiveOutDepth > UI->SU->getDepth())
> +          CyclicLatency = LiveOutDepth - UI->SU->getDepth();
> +        unsigned LiveInHeight = UI->SU->getHeight() + PI->getLatency();
> +        if (LiveInHeight > LiveOutHeight) {
> +          if (LiveInHeight - LiveOutHeight < CyclicLatency)
> +            CyclicLatency = LiveInHeight - LiveOutHeight;
> +        }
> +        else
> +          CyclicLatency = 0;
> +        DEBUG(dbgs() << "Cyclic Path: SU(" << PI->getSUnit()->NodeNum
> +              << ") -> SU(" << UI->SU->NodeNum << ") = "
> +              << CyclicLatency << "\n");
> +        if (CyclicLatency > MaxCyclicLatency)
> +          MaxCyclicLatency = CyclicLatency;
> +      }
> +    }
> +  }
> +  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "\n");
> +  return MaxCyclicLatency;
> +}
> +
>  void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
>  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
>    SU->getInstr()->dump();
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130823/b3e63022/attachment.html>


More information about the llvm-commits mailing list