[llvm] r189120 - Adds cyclic critical path computation and heuristics, temporarily disabled.
Hal Finkel
hfinkel at anl.gov
Fri Aug 23 11:18:35 PDT 2013
Andy,
Can you please provide a slightly more verbose explanation of what this is doing? The commit message seems to imply that inside a loop there could be both a cyclic and acyclic critical path, and these two are being compared. Is that right? If so, what is the acyclic critical path inside a loop?
Thanks again,
Hal
----- Original Message -----
> Author: atrick
> Date: Fri Aug 23 12:48:43 2013
> New Revision: 189120
>
> URL: http://llvm.org/viewvc/llvm-project?rev=189120&view=rev
> Log:
> Adds cyclic critical path computation and heuristics, temporarily
> disabled.
>
> Estimate the cyclic critical path within a single block loop. If the
> acyclic critical path is longer, then the loop will exhaust OOO
> resources after some number of iterations. If lag between the acyclic
> critical path and cyclic critical path is longer the the time it
> takes
> to issue those loop iterations, then aggressively schedule for
> latency.
>
> Modified:
> llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
> llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>
> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=189120&r1=189119&r2=189120&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Fri Aug 23
> 12:48:43 2013
> @@ -197,6 +197,9 @@ namespace llvm {
> /// input.
> void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker
> *RPTracker = 0);
>
> + /// Compute the cyclic critical path through the DAG.
> + unsigned computeCyclicCriticalPath();
> +
> /// addSchedBarrierDeps - Add dependencies from instructions in
> the current
> /// list of instructions being scheduled to scheduling barrier.
> We want to
> /// make sure instructions which define registers that are
> either used by
>
> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=189120&r1=189119&r2=189120&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Fri Aug 23 12:48:43
> 2013
> @@ -53,6 +53,9 @@ static cl::opt<unsigned> MISchedCutoff("
> static bool ViewMISchedDAGs = false;
> #endif // NDEBUG
>
> +static cl::opt<bool> EnableCyclicPath("misched-cyclicpath",
> cl::Hidden,
> + cl::desc("Enable cyclic critical path analysis."),
> cl::init(false));
> +
> static cl::opt<bool> EnableLoadCluster("misched-cluster",
> cl::Hidden,
> cl::desc("Enable load clustering."), cl::init(true));
>
> @@ -1207,16 +1210,21 @@ public:
> struct SchedRemainder {
> // Critical path through the DAG in expected latency.
> unsigned CriticalPath;
> + unsigned CyclicCritPath;
>
> // Scaled count of micro-ops left to schedule.
> unsigned RemIssueCount;
>
> + bool IsAcyclicLatencyLimited;
> +
> // Unscheduled resources
> SmallVector<unsigned, 16> RemainingCounts;
>
> void reset() {
> CriticalPath = 0;
> + CyclicCritPath = 0;
> RemIssueCount = 0;
> + IsAcyclicLatencyLimited = false;
> RemainingCounts.clear();
> }
>
> @@ -1434,6 +1442,8 @@ public:
> virtual void registerRoots();
>
> protected:
> + void checkAcyclicLatency();
> +
> void tryCandidate(SchedCandidate &Cand,
> SchedCandidate &TryCand,
> SchedBoundary &Zone,
> @@ -1547,8 +1557,32 @@ void ConvergingScheduler::releaseBottomN
> Bot.releaseNode(SU, SU->BotReadyCycle);
> }
>
> +void ConvergingScheduler::checkAcyclicLatency() {
> + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >=
> Rem.CriticalPath)
> + return;
> +
> + unsigned BufferLimit =
> + SchedModel->getMicroOpBufferSize() *
> SchedModel->getMicroOpFactor();
> + unsigned LatencyLag = Rem.CriticalPath - Rem.CyclicCritPath;
> + Rem.IsAcyclicLatencyLimited =
> + (LatencyLag * SchedModel->getLatencyFactor()) > BufferLimit;
> +
> + DEBUG(dbgs() << "BufferLimit " << BufferLimit << "u / "
> + << Rem.RemIssueCount << "u = "
> + << (BufferLimit + Rem.RemIssueCount) / Rem.RemIssueCount <<
> " iters. "
> + << "Latency = " << LatencyLag << "c = "
> + << LatencyLag * SchedModel->getLatencyFactor() << "u\n";
> + if (Rem.IsAcyclicLatencyLimited)
> + dbgs() << " ACYCLIC LATENCY LIMIT\n");
> +}
> +
> void ConvergingScheduler::registerRoots() {
> Rem.CriticalPath = DAG->ExitSU.getDepth();
> +
> + if (EnableCyclicPath) {
> + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
> + checkAcyclicLatency();
> + }
> // Some roots may not feed into ExitSU. Check all of them in case.
> for (std::vector<SUnit*>::const_iterator
> I = Bot.Available.begin(), E = Bot.Available.end(); I != E;
> ++I) {
> @@ -2096,6 +2130,32 @@ static int biasPhysRegCopy(const SUnit *
> return 0;
> }
>
> +static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand,
> + ConvergingScheduler::SchedCandidate &Cand,
> + ConvergingScheduler::SchedBoundary &Zone) {
> + if (Zone.isTop()) {
> + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
> + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> + TryCand, Cand,
> ConvergingScheduler::TopDepthReduce))
> + return true;
> + }
> + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> + TryCand, Cand,
> ConvergingScheduler::TopPathReduce))
> + return true;
> + }
> + else {
> + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
> + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> + TryCand, Cand,
> ConvergingScheduler::BotHeightReduce))
> + return true;
> + }
> + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> + TryCand, Cand,
> ConvergingScheduler::BotPathReduce))
> + return true;
> + }
> + return false;
> +}
> +
> /// Apply a set of heursitics to a new candidate. Heuristics are
> currently
> /// hierarchical. This may be more efficient than a graduated cost
> model because
> /// we don't need to evaluate all aspects of the model for each node
> in the
> @@ -2135,6 +2195,10 @@ void ConvergingScheduler::tryCandidate(S
> RegExcess))
> return;
>
> + // For loops that are acyclic path limited, aggressively schedule
> for latency.
> + if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand,
> Zone))
> + return;
> +
> // Avoid increasing the max critical pressure in the scheduled
> region.
> if (tryPressure(TryCand.RPDelta.CriticalMax,
> Cand.RPDelta.CriticalMax,
> TryCand, Cand, RegCritical))
> @@ -2174,27 +2238,10 @@ void ConvergingScheduler::tryCandidate(S
> return;
>
> // Avoid serializing long latency dependence chains.
> - if (Cand.Policy.ReduceLatency) {
> - if (Zone.isTop()) {
> - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
> - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> - TryCand, Cand, TopDepthReduce))
> - return;
> - }
> - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> - TryCand, Cand, TopPathReduce))
> - return;
> - }
> - else {
> - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
> - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
> - TryCand, Cand, BotHeightReduce))
> - return;
> - }
> - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
> - TryCand, Cand, BotPathReduce))
> - return;
> - }
> + // For acyclic path limited loops, latency was already checked
> above.
> + if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
> + && tryLatency(TryCand, Cand, Zone)) {
> + return;
> }
>
> // Prefer immediate defs/users of the last scheduled instruction.
> This is a
>
> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=189120&r1=189119&r2=189120&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Fri Aug 23 12:48:43
> 2013
> @@ -36,6 +36,8 @@
> #include "llvm/Target/TargetMachine.h"
> #include "llvm/Target/TargetRegisterInfo.h"
> #include "llvm/Target/TargetSubtargetInfo.h"
> +#include <queue>
> +
> using namespace llvm;
>
> static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi",
> cl::Hidden,
> @@ -979,6 +981,65 @@ void ScheduleDAGInstrs::buildSchedGraph(
> PendingLoads.clear();
> }
>
> +/// Compute the max cyclic critical path through the DAG. For loops
> that span
> +/// basic blocks, MachineTraceMetrics should be used for this
> instead.
> +unsigned ScheduleDAGInstrs::computeCyclicCriticalPath() {
> + // This only applies to single block loop.
> + if (!BB->isSuccessor(BB))
> + return 0;
> +
> + unsigned MaxCyclicLatency = 0;
> + // Visit each live out vreg def to find def/use pairs that cross
> iterations.
> + for (SUnit::const_pred_iterator
> + PI = ExitSU.Preds.begin(), PE = ExitSU.Preds.end(); PI !=
> PE; ++PI) {
> + MachineInstr *MI = PI->getSUnit()->getInstr();
> + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
> + const MachineOperand &MO = MI->getOperand(i);
> + if (!MO.isReg() || !MO.isDef())
> + break;
> + unsigned Reg = MO.getReg();
> + if (!Reg || TRI->isPhysicalRegister(Reg))
> + continue;
> +
> + const LiveInterval &LI = LIS->getInterval(Reg);
> + unsigned LiveOutHeight = PI->getSUnit()->getHeight();
> + unsigned LiveOutDepth = PI->getSUnit()->getDepth() +
> PI->getLatency();
> + // Visit all local users of the vreg def.
> + for (VReg2UseMap::iterator
> + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
> + if (UI->SU == &ExitSU)
> + continue;
> +
> + // Only consider uses of the phi.
> + LiveRangeQuery LRQ(LI,
> LIS->getInstructionIndex(UI->SU->getInstr()));
> + if (!LRQ.valueIn()->isPHIDef())
> + continue;
> +
> + // Cheat a bit and assume that a path spanning two
> iterations is a
> + // cycle, which could overestimate in strange cases. This
> allows cyclic
> + // latency to be estimated as the minimum height or depth
> slack.
> + unsigned CyclicLatency = 0;
> + if (LiveOutDepth > UI->SU->getDepth())
> + CyclicLatency = LiveOutDepth - UI->SU->getDepth();
> + unsigned LiveInHeight = UI->SU->getHeight() +
> PI->getLatency();
> + if (LiveInHeight > LiveOutHeight) {
> + if (LiveInHeight - LiveOutHeight < CyclicLatency)
> + CyclicLatency = LiveInHeight - LiveOutHeight;
> + }
> + else
> + CyclicLatency = 0;
> + DEBUG(dbgs() << "Cyclic Path: SU(" <<
> PI->getSUnit()->NodeNum
> + << ") -> SU(" << UI->SU->NodeNum << ") = "
> + << CyclicLatency << "\n");
> + if (CyclicLatency > MaxCyclicLatency)
> + MaxCyclicLatency = CyclicLatency;
> + }
> + }
> + }
> + DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency <<
> "\n");
> + return MaxCyclicLatency;
> +}
> +
> void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
> #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
> SU->getInstr()->dump();
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list