[llvm] r198122 - Add a PostMachineScheduler pass with generic implementation.

Andrew Trick atrick at apple.com
Sat Dec 28 13:56:57 PST 2013


Author: atrick
Date: Sat Dec 28 15:56:57 2013
New Revision: 198122

URL: http://llvm.org/viewvc/llvm-project?rev=198122&view=rev
Log:
Add a PostMachineScheduler pass with generic implementation.

PostGenericScheduler uses either the new machine model or the hazard
checker for top-down scheduling. Most of the infrastructure for PreRA
machine scheduling is reused.

With a some tuning, this should allow MachineScheduler to be default
for all ARM targets, including cortex-A9, using the new machine
model. Likewise, with additional tuning, it should be able to replace
PostRAScheduler for all targets.

The PostMachineScheduler pass does not currently run the
AntiDepBreaker. There is less need for it on targets that are already
running preRA MachineScheduler. I want to prove it's necessary before
committing to the maintenance burden.

The PostMachineScheduler also currently removes kill flags and adds
them all back later. This is a bit ridiculous. I'd prefer passes to
directly use a liveness utility than rely on flags.

A test case that enables this scheduler will be included in a
subsequent checkin that updates the A9 model.

Modified:
    llvm/trunk/lib/CodeGen/MachineScheduler.cpp

Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=198122&r1=198121&r2=198122&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Sat Dec 28 15:56:57 2013
@@ -204,8 +204,8 @@ DefaultSchedRegistry("default", "Use the
 
 /// Forward declare the standard machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C);
-static ScheduleDAGInstrs *createRawGenericSched(MachineSchedContext *C);
+static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
+static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
 
 /// Decrement this iterator until reaching the top or a non-debug instr.
 static MachineBasicBlock::const_iterator
@@ -264,7 +264,7 @@ ScheduleDAGInstrs *MachineScheduler::cre
     return Scheduler;
 
   // Default to GenericScheduler.
-  return createGenericSched(this);
+  return createGenericSchedLive(this);
 }
 
 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
@@ -277,8 +277,7 @@ ScheduleDAGInstrs *PostMachineScheduler:
     return Scheduler;
 
   // Default to GenericScheduler.
-  // return createRawGenericSched(this);
-  return NULL;
+  return createGenericSchedPostRA(this);
 }
 
 /// Top-level MachineScheduler pass driver.
@@ -346,9 +345,28 @@ bool PostMachineScheduler::runOnMachineF
   return true;
 }
 
+/// Return true of the given instruction should not be included in a scheduling
+/// region.
+///
+/// MachineScheduler does not currently support scheduling across calls. To
+/// handle calls, the DAG builder needs to be modified to create register
+/// anti/output dependencies on the registers clobbered by the call's regmask
+/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
+/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
+/// the boundary, but there would be no benefit to postRA scheduling across
+/// calls this late anyway.
+static bool isSchedBoundary(MachineBasicBlock::iterator MI,
+                            MachineBasicBlock *MBB,
+                            MachineFunction *MF,
+                            const TargetInstrInfo *TII,
+                            bool IsPostRA) {
+  return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
+}
+
 /// Main driver for both MachineScheduler and PostMachineScheduler.
 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  bool IsPostRA = Scheduler.isPostRA();
 
   // Visit all machine basic blocks.
   //
@@ -369,13 +387,16 @@ void MachineSchedulerBase::scheduleRegio
     // The Scheduler may insert instructions during either schedule() or
     // exitRegion(), even for empty regions. So the local iterators 'I' and
     // 'RegionEnd' are invalid across these calls.
-    unsigned RemainingInstrs = MBB->size();
+    //
+    // MBB::size() uses instr_iterator to count. Here we need a bundle to count
+    // as a single instruction.
+    unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
         RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
 
       // Avoid decrementing RegionEnd for blocks with no terminator.
       if (RegionEnd != MBB->end()
-          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+          || isSchedBoundary(llvm::prior(RegionEnd), MBB, MF, TII, IsPostRA)) {
         --RegionEnd;
         // Count the boundary instruction.
         --RemainingInstrs;
@@ -386,7 +407,7 @@ void MachineSchedulerBase::scheduleRegio
       unsigned NumRegionInstrs = 0;
       MachineBasicBlock::iterator I = RegionEnd;
       for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
-        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+        if (isSchedBoundary(llvm::prior(I), MBB, MF, TII, IsPostRA))
           break;
       }
       // Notify the scheduler of the region, even if we may skip scheduling
@@ -400,7 +421,8 @@ void MachineSchedulerBase::scheduleRegio
         Scheduler.exitRegion();
         continue;
       }
-      DEBUG(dbgs() << "********** MI Scheduling **********\n");
+      DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
+            << "MI Scheduling **********\n");
       DEBUG(dbgs() << MF->getName()
             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
             << "\n  From: " << *I << "    To: ";
@@ -422,6 +444,11 @@ void MachineSchedulerBase::scheduleRegio
     }
     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
     Scheduler.finishBlock();
+    if (Scheduler.isPostRA()) {
+      // FIXME: Ideally, no further passes should rely on kill flags. However,
+      // thumb2 size reduction is currently an exception.
+      Scheduler.fixupKills(MBB);
+    }
   }
   Scheduler.finalizeSchedule();
 }
@@ -1502,7 +1529,7 @@ void CopyConstrain::apply(ScheduleDAGMI
 //===----------------------------------------------------------------------===//
 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
 // and possibly other custom schedulers.
-// ===----------------------------------------------------------------------===/
+//===----------------------------------------------------------------------===//
 
 static const unsigned InvalidCycle = ~0U;
 
@@ -1531,6 +1558,9 @@ void SchedBoundary::reset() {
   IsResourceLimited = false;
   ReservedCycles.clear();
 #ifndef NDEBUG
+  // Track the maximum number of stall cycles that could arise either from the
+  // latency of a DAG edge or the number of cycles that a processor resource is
+  // reserved (SchedBoundary::ReservedCycles).
   MaxObservedLatency = 0;
 #endif
   // Reserve a zero-count for invalid CritResIdx.
@@ -1616,9 +1646,10 @@ getNextResourceCycle(unsigned PIdx, unsi
 ///
 /// TODO: Also check whether the SU must start a new group.
 bool SchedBoundary::checkHazard(SUnit *SU) {
-  if (HazardRec->isEnabled())
-    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
-
+  if (HazardRec->isEnabled()
+      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
+    return true;
+  }
   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
@@ -1904,8 +1935,12 @@ void SchedBoundary::bumpNode(SUnit *SU)
              PI = SchedModel->getWriteProcResBegin(SC),
              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
         unsigned PIdx = PI->ProcResourceIdx;
-        if (SchedModel->getProcResource(PIdx)->BufferSize == 0)
+        if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
           ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
+#ifndef NDEBUG
+          MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
+#endif
+        }
       }
     }
   }
@@ -1940,9 +1975,9 @@ void SchedBoundary::bumpNode(SUnit *SU)
   // bump the cycle to avoid uselessly checking everything in the readyQ.
   CurrMOps += IncMOps;
   while (CurrMOps >= SchedModel->getIssueWidth()) {
-    bumpCycle(++NextCycle);
     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
           << " at cycle " << CurrCycle << '\n');
+    bumpCycle(++NextCycle);
   }
   DEBUG(dumpScheduledState());
 }
@@ -2045,13 +2080,14 @@ void SchedBoundary::dumpScheduledState()
 #endif
 
 //===----------------------------------------------------------------------===//
-// GenericScheduler - Implementation of the generic MachineSchedStrategy.
+// GenericScheduler - Generic implementation of MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class GenericScheduler : public MachineSchedStrategy {
+/// Base class for GenericScheduler. This class maintains information about
+/// scheduling candidates based on TargetSchedModel making it easy to implement
+/// heuristics for either preRA or postRA scheduling.
+class GenericSchedulerBase : public MachineSchedStrategy {
 public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
@@ -2061,7 +2097,7 @@ public:
     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
 
 #ifndef NDEBUG
-  static const char *getReasonStr(GenericScheduler::CandReason Reason);
+  static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
 #endif
 
   /// Policy for scheduling the next instruction in the candidate's zone.
@@ -2129,51 +2165,307 @@ public:
     bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
     void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
 
-    void initResourceDelta(const ScheduleDAGMILive *DAG,
+    void initResourceDelta(const ScheduleDAGMI *DAG,
                            const TargetSchedModel *SchedModel);
   };
 
-private:
+protected:
   const MachineSchedContext *Context;
-  ScheduleDAGMILive *DAG;
   const TargetSchedModel *SchedModel;
   const TargetRegisterInfo *TRI;
 
-  // State of the top and bottom scheduled instruction boundaries.
   SchedRemainder Rem;
+protected:
+  GenericSchedulerBase(const MachineSchedContext *C):
+    Context(C), SchedModel(0), TRI(0) {}
+
+  void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
+                 SchedBoundary *OtherZone);
+
+#ifndef NDEBUG
+  void traceCandidate(const SchedCandidate &Cand);
+#endif
+};
+} // namespace
+
+void GenericSchedulerBase::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+                  const TargetSchedModel *SchedModel) {
+  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+    return;
+
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  for (TargetSchedModel::ProcResIter
+         PI = SchedModel->getWriteProcResBegin(SC),
+         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+      ResDelta.CritResources += PI->Cycles;
+    if (PI->ProcResourceIdx == Policy.DemandResIdx)
+      ResDelta.DemandedResources += PI->Cycles;
+  }
+}
+
+/// Set the CandPolicy given a scheduling zone given the current resources and
+/// latencies inside and outside the zone.
+void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
+                                     bool IsPostRA,
+                                     SchedBoundary &CurrZone,
+                                     SchedBoundary *OtherZone) {
+  // Apply preemptive heuristics based on the the total latency and resources
+  // inside and outside this zone. Potential stalls should be considered before
+  // following this policy.
+
+  // Compute remaining latency. We need this both to determine whether the
+  // overall schedule has become latency-limited and whether the instructions
+  // outside this zone are resource or latency limited.
+  //
+  // The "dependent" latency is updated incrementally during scheduling as the
+  // max height/depth of scheduled nodes minus the cycles since it was
+  // scheduled:
+  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+  //
+  // The "independent" latency is the max ready queue depth:
+  //   ILat = max N.depth for N in Available|Pending
+  //
+  // RemainingLatency is the greater of independent and dependent latency.
+  unsigned RemLatency = CurrZone.getDependentLatency();
+  RemLatency = std::max(RemLatency,
+                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
+  RemLatency = std::max(RemLatency,
+                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+
+  // Compute the critical resource outside the zone.
+  unsigned OtherCritIdx;
+  unsigned OtherCount =
+    OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
+
+  bool OtherResLimited = false;
+  if (SchedModel->hasInstrSchedModel()) {
+    unsigned LFactor = SchedModel->getLatencyFactor();
+    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
+  }
+  // Schedule aggressively for latency in PostRA mode. We don't check for
+  // acyclic latency during PostRA, and highly out-of-order processors will
+  // skip PostRA scheduling.
+  if (!OtherResLimited) {
+    if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
+      Policy.ReduceLatency |= true;
+      DEBUG(dbgs() << "  " << CurrZone.Available.getName()
+            << " RemainingLatency " << RemLatency << " + "
+            << CurrZone.getCurrCycle() << "c > CritPath "
+            << Rem.CriticalPath << "\n");
+    }
+  }
+  // If the same resource is limiting inside and outside the zone, do nothing.
+  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
+    return;
+
+  DEBUG(
+    if (CurrZone.isResourceLimited()) {
+      dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
+             << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
+             << "\n";
+    }
+    if (OtherResLimited)
+      dbgs() << "  RemainingLimit: "
+             << SchedModel->getResourceName(OtherCritIdx) << "\n";
+    if (!CurrZone.isResourceLimited() && !OtherResLimited)
+      dbgs() << "  Latency limited both directions.\n");
+
+  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
+    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
+
+  if (OtherResLimited)
+    Policy.DemandResIdx = OtherCritIdx;
+}
+
+#ifndef NDEBUG
+const char *GenericSchedulerBase::getReasonStr(
+  GenericSchedulerBase::CandReason Reason) {
+  switch (Reason) {
+  case NoCand:         return "NOCAND    ";
+  case PhysRegCopy:    return "PREG-COPY";
+  case RegExcess:      return "REG-EXCESS";
+  case RegCritical:    return "REG-CRIT  ";
+  case Stall:          return "STALL     ";
+  case Cluster:        return "CLUSTER   ";
+  case Weak:           return "WEAK      ";
+  case RegMax:         return "REG-MAX   ";
+  case ResourceReduce: return "RES-REDUCE";
+  case ResourceDemand: return "RES-DEMAND";
+  case TopDepthReduce: return "TOP-DEPTH ";
+  case TopPathReduce:  return "TOP-PATH  ";
+  case BotHeightReduce:return "BOT-HEIGHT";
+  case BotPathReduce:  return "BOT-PATH  ";
+  case NextDefUse:     return "DEF-USE   ";
+  case NodeOrder:      return "ORDER     ";
+  };
+  llvm_unreachable("Unknown reason!");
+}
+
+void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
+  PressureChange P;
+  unsigned ResIdx = 0;
+  unsigned Latency = 0;
+  switch (Cand.Reason) {
+  default:
+    break;
+  case RegExcess:
+    P = Cand.RPDelta.Excess;
+    break;
+  case RegCritical:
+    P = Cand.RPDelta.CriticalMax;
+    break;
+  case RegMax:
+    P = Cand.RPDelta.CurrentMax;
+    break;
+  case ResourceReduce:
+    ResIdx = Cand.Policy.ReduceResIdx;
+    break;
+  case ResourceDemand:
+    ResIdx = Cand.Policy.DemandResIdx;
+    break;
+  case TopDepthReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  case TopPathReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotHeightReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotPathReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  }
+  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+  if (P.isValid())
+    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
+           << ":" << P.getUnitInc() << " ";
+  else
+    dbgs() << "      ";
+  if (ResIdx)
+    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
+  else
+    dbgs() << "         ";
+  if (Latency)
+    dbgs() << " " << Latency << " cycles ";
+  else
+    dbgs() << "          ";
+  dbgs() << '\n';
+}
+#endif
+
+/// Return true if this heuristic determines order.
+static bool tryLess(int TryVal, int CandVal,
+                    GenericSchedulerBase::SchedCandidate &TryCand,
+                    GenericSchedulerBase::SchedCandidate &Cand,
+                    GenericSchedulerBase::CandReason Reason) {
+  if (TryVal < CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal > CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  Cand.setRepeat(Reason);
+  return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+                       GenericSchedulerBase::SchedCandidate &TryCand,
+                       GenericSchedulerBase::SchedCandidate &Cand,
+                       GenericSchedulerBase::CandReason Reason) {
+  if (TryVal > CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal < CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  Cand.setRepeat(Reason);
+  return false;
+}
+
+static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+                       GenericSchedulerBase::SchedCandidate &Cand,
+                       SchedBoundary &Zone) {
+  if (Zone.isTop()) {
+    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
+        return true;
+    }
+    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))
+      return true;
+  }
+  else {
+    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
+        return true;
+    }
+    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))
+      return true;
+  }
+  return false;
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
+                      bool IsTop) {
+  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
+        << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
+}
+
+namespace {
+/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class GenericScheduler : public GenericSchedulerBase {
+  ScheduleDAGMILive *DAG;
+
+  // State of the top and bottom scheduled instruction boundaries.
   SchedBoundary Top;
   SchedBoundary Bot;
 
   MachineSchedPolicy RegionPolicy;
 public:
   GenericScheduler(const MachineSchedContext *C):
-    Context(C), DAG(0), SchedModel(0), TRI(0),
-    Top(SchedBoundary::TopQID, "TopQ"), Bot(SchedBoundary::BotQID, "BotQ") {}
+    GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"),
+    Bot(SchedBoundary::BotQID, "BotQ") {}
 
   virtual void initPolicy(MachineBasicBlock::iterator Begin,
                           MachineBasicBlock::iterator End,
-                          unsigned NumRegionInstrs);
+                          unsigned NumRegionInstrs) LLVM_OVERRIDE;
 
-  bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; }
+  virtual bool shouldTrackPressure() const LLVM_OVERRIDE {
+    return RegionPolicy.ShouldTrackPressure;
+  }
 
-  virtual void initialize(ScheduleDAGMI *dag);
+  virtual void initialize(ScheduleDAGMI *dag) LLVM_OVERRIDE;
 
-  virtual SUnit *pickNode(bool &IsTopNode);
+  virtual SUnit *pickNode(bool &IsTopNode) LLVM_OVERRIDE;
 
-  virtual void schedNode(SUnit *SU, bool IsTopNode);
+  virtual void schedNode(SUnit *SU, bool IsTopNode) LLVM_OVERRIDE;
 
-  virtual void releaseTopNode(SUnit *SU) { Top.releaseTopNode(SU); }
+  virtual void releaseTopNode(SUnit *SU) LLVM_OVERRIDE {
+    Top.releaseTopNode(SU);
+  }
 
-  virtual void releaseBottomNode(SUnit *SU) { Bot.releaseBottomNode(SU); }
+  virtual void releaseBottomNode(SUnit *SU) LLVM_OVERRIDE {
+    Bot.releaseBottomNode(SU);
+  }
 
-  virtual void registerRoots();
+  virtual void registerRoots() LLVM_OVERRIDE;
 
 protected:
   void checkAcyclicLatency();
 
-  void setPolicy(CandPolicy &Policy, SchedBoundary &CurrZone,
-                 SchedBoundary &OtherZone);
-
   void tryCandidate(SchedCandidate &Cand,
                     SchedCandidate &TryCand,
                     SchedBoundary &Zone,
@@ -2187,10 +2479,6 @@ protected:
                          SchedCandidate &Candidate);
 
   void reschedulePhysRegCopies(SUnit *SU, bool isTop);
-
-#ifndef NDEBUG
-  void traceCandidate(const SchedCandidate &Cand);
-#endif
 };
 } // namespace
 
@@ -2317,129 +2605,11 @@ void GenericScheduler::registerRoots() {
   }
 }
 
-/// Set the CandPolicy given a scheduling zone given the current resources and
-/// latencies inside and outside the zone.
-void GenericScheduler::setPolicy(CandPolicy &Policy, SchedBoundary &CurrZone,
-                                 SchedBoundary &OtherZone) {
-  // Apply preemptive heuristics based on the the total latency and resources
-  // inside and outside this zone. Potential stalls should be considered before
-  // following this policy.
-
-  // Compute remaining latency. We need this both to determine whether the
-  // overall schedule has become latency-limited and whether the instructions
-  // outside this zone are resource or latency limited.
-  //
-  // The "dependent" latency is updated incrementally during scheduling as the
-  // max height/depth of scheduled nodes minus the cycles since it was
-  // scheduled:
-  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
-  //
-  // The "independent" latency is the max ready queue depth:
-  //   ILat = max N.depth for N in Available|Pending
-  //
-  // RemainingLatency is the greater of independent and dependent latency.
-  unsigned RemLatency = CurrZone.getDependentLatency();
-  RemLatency = std::max(RemLatency,
-                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
-  RemLatency = std::max(RemLatency,
-                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
-
-  // Compute the critical resource outside the zone.
-  unsigned OtherCritIdx;
-  unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
-
-  bool OtherResLimited = false;
-  if (SchedModel->hasInstrSchedModel()) {
-    unsigned LFactor = SchedModel->getLatencyFactor();
-    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
-  }
-  if (!OtherResLimited
-      && (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
-    Policy.ReduceLatency |= true;
-    DEBUG(dbgs() << "  " << CurrZone.Available.getName() << " RemainingLatency "
-          << RemLatency << " + " << CurrZone.getCurrCycle() << "c > CritPath "
-          << Rem.CriticalPath << "\n");
-  }
-  // If the same resource is limiting inside and outside the zone, do nothing.
-  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
-    return;
-
-  DEBUG(
-    if (CurrZone.isResourceLimited()) {
-      dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
-             << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
-             << "\n";
-    }
-    if (OtherResLimited)
-      dbgs() << "  RemainingLimit: "
-             << SchedModel->getResourceName(OtherCritIdx) << "\n";
-    if (!CurrZone.isResourceLimited() && !OtherResLimited)
-      dbgs() << "  Latency limited both directions.\n");
-
-  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
-    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
-
-  if (OtherResLimited)
-    Policy.DemandResIdx = OtherCritIdx;
-}
-
-void GenericScheduler::SchedCandidate::
-initResourceDelta(const ScheduleDAGMILive *DAG,
-                  const TargetSchedModel *SchedModel) {
-  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
-    return;
-
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
-  for (TargetSchedModel::ProcResIter
-         PI = SchedModel->getWriteProcResBegin(SC),
-         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
-      ResDelta.CritResources += PI->Cycles;
-    if (PI->ProcResourceIdx == Policy.DemandResIdx)
-      ResDelta.DemandedResources += PI->Cycles;
-  }
-}
-
-/// Return true if this heuristic determines order.
-static bool tryLess(int TryVal, int CandVal,
-                    GenericScheduler::SchedCandidate &TryCand,
-                    GenericScheduler::SchedCandidate &Cand,
-                    GenericScheduler::CandReason Reason) {
-  if (TryVal < CandVal) {
-    TryCand.Reason = Reason;
-    return true;
-  }
-  if (TryVal > CandVal) {
-    if (Cand.Reason > Reason)
-      Cand.Reason = Reason;
-    return true;
-  }
-  Cand.setRepeat(Reason);
-  return false;
-}
-
-static bool tryGreater(int TryVal, int CandVal,
-                       GenericScheduler::SchedCandidate &TryCand,
-                       GenericScheduler::SchedCandidate &Cand,
-                       GenericScheduler::CandReason Reason) {
-  if (TryVal > CandVal) {
-    TryCand.Reason = Reason;
-    return true;
-  }
-  if (TryVal < CandVal) {
-    if (Cand.Reason > Reason)
-      Cand.Reason = Reason;
-    return true;
-  }
-  Cand.setRepeat(Reason);
-  return false;
-}
-
 static bool tryPressure(const PressureChange &TryP,
                         const PressureChange &CandP,
-                        GenericScheduler::SchedCandidate &TryCand,
-                        GenericScheduler::SchedCandidate &Cand,
-                        GenericScheduler::CandReason Reason) {
+                        GenericSchedulerBase::SchedCandidate &TryCand,
+                        GenericSchedulerBase::SchedCandidate &Cand,
+                        GenericSchedulerBase::CandReason Reason) {
   int TryRank = TryP.getPSetOrMax();
   int CandRank = CandP.getPSetOrMax();
   // If both candidates affect the same set, go with the smallest increase.
@@ -2491,32 +2661,6 @@ static int biasPhysRegCopy(const SUnit *
   return 0;
 }
 
-static bool tryLatency(GenericScheduler::SchedCandidate &TryCand,
-                       GenericScheduler::SchedCandidate &Cand,
-                       SchedBoundary &Zone) {
-  if (Zone.isTop()) {
-    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
-      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
-                  TryCand, Cand, GenericScheduler::TopDepthReduce))
-        return true;
-    }
-    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
-                   TryCand, Cand, GenericScheduler::TopPathReduce))
-      return true;
-  }
-  else {
-    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
-      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
-                  TryCand, Cand, GenericScheduler::BotHeightReduce))
-        return true;
-    }
-    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
-                   TryCand, Cand, GenericScheduler::BotPathReduce))
-      return true;
-  }
-  return false;
-}
-
 /// Apply a set of heursitics to a new candidate. Heuristics are currently
 /// hierarchical. This may be more efficient than a graduated cost model because
 /// we don't need to evaluate all aspects of the model for each node in the
@@ -2658,83 +2802,6 @@ void GenericScheduler::tryCandidate(Sche
   }
 }
 
-#ifndef NDEBUG
-const char *GenericScheduler::getReasonStr(
-  GenericScheduler::CandReason Reason) {
-  switch (Reason) {
-  case NoCand:         return "NOCAND    ";
-  case PhysRegCopy:    return "PREG-COPY";
-  case RegExcess:      return "REG-EXCESS";
-  case RegCritical:    return "REG-CRIT  ";
-  case Stall:          return "STALL     ";
-  case Cluster:        return "CLUSTER   ";
-  case Weak:           return "WEAK      ";
-  case RegMax:         return "REG-MAX   ";
-  case ResourceReduce: return "RES-REDUCE";
-  case ResourceDemand: return "RES-DEMAND";
-  case TopDepthReduce: return "TOP-DEPTH ";
-  case TopPathReduce:  return "TOP-PATH  ";
-  case BotHeightReduce:return "BOT-HEIGHT";
-  case BotPathReduce:  return "BOT-PATH  ";
-  case NextDefUse:     return "DEF-USE   ";
-  case NodeOrder:      return "ORDER     ";
-  };
-  llvm_unreachable("Unknown reason!");
-}
-
-void GenericScheduler::traceCandidate(const SchedCandidate &Cand) {
-  PressureChange P;
-  unsigned ResIdx = 0;
-  unsigned Latency = 0;
-  switch (Cand.Reason) {
-  default:
-    break;
-  case RegExcess:
-    P = Cand.RPDelta.Excess;
-    break;
-  case RegCritical:
-    P = Cand.RPDelta.CriticalMax;
-    break;
-  case RegMax:
-    P = Cand.RPDelta.CurrentMax;
-    break;
-  case ResourceReduce:
-    ResIdx = Cand.Policy.ReduceResIdx;
-    break;
-  case ResourceDemand:
-    ResIdx = Cand.Policy.DemandResIdx;
-    break;
-  case TopDepthReduce:
-    Latency = Cand.SU->getDepth();
-    break;
-  case TopPathReduce:
-    Latency = Cand.SU->getHeight();
-    break;
-  case BotHeightReduce:
-    Latency = Cand.SU->getHeight();
-    break;
-  case BotPathReduce:
-    Latency = Cand.SU->getDepth();
-    break;
-  }
-  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
-  if (P.isValid())
-    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
-           << ":" << P.getUnitInc() << " ";
-  else
-    dbgs() << "      ";
-  if (ResIdx)
-    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
-  else
-    dbgs() << "         ";
-  if (Latency)
-    dbgs() << " " << Latency << " cycles ";
-  else
-    dbgs() << "          ";
-  dbgs() << '\n';
-}
-#endif
-
 /// Pick the best candidate from the queue.
 ///
 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
@@ -2765,12 +2832,6 @@ void GenericScheduler::pickNodeFromQueue
   }
 }
 
-static void tracePick(const GenericScheduler::SchedCandidate &Cand,
-                      bool IsTop) {
-  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
-        << GenericScheduler::getReasonStr(Cand.Reason) << '\n');
-}
-
 /// Pick the best candidate node from either the top or bottom queue.
 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // Schedule as far as possible in the direction of no choice. This is most
@@ -2790,10 +2851,10 @@ SUnit *GenericScheduler::pickNodeBidirec
   SchedCandidate TopCand(NoPolicy);
   // Set the bottom-up policy based on the state of the current bottom zone and
   // the instructions outside the zone, including the top zone.
-  setPolicy(BotCand.Policy, Bot, Top);
+  setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
   // Set the top-down policy based on the state of the current top zone and
   // the instructions outside the zone, including the bottom zone.
-  setPolicy(TopCand.Policy, Top, Bot);
+  setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
 
   // Prefer bottom scheduling when heuristics are silent.
   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
@@ -2903,8 +2964,9 @@ void GenericScheduler::reschedulePhysReg
 }
 
 /// Update the scheduler's state after scheduling a node. This is the same node
-/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to update
-/// it's state based on the current cycle before MachineSchedStrategy does.
+/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
+/// update it's state based on the current cycle before MachineSchedStrategy
+/// does.
 ///
 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
 /// them here. See comments in biasPhysRegCopy.
@@ -2923,16 +2985,10 @@ void GenericScheduler::schedNode(SUnit *
   }
 }
 
-/// Create a generic scheduler with no DAG mutation passes.
-static ScheduleDAGInstrs *createRawGenericSched(MachineSchedContext *C) {
-  return new ScheduleDAGMILive(C, new GenericScheduler(C));
-}
-
 /// Create the standard converging machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) {
-  ScheduleDAGMILive *DAG =
-    static_cast<ScheduleDAGMILive*>(createRawGenericSched(C));
+static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
+  ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C));
   // Register DAG post-processors.
   //
   // FIXME: extend the mutation API to allow earlier mutations to instantiate
@@ -2945,9 +3001,191 @@ static ScheduleDAGInstrs *createGenericS
     DAG->addMutation(new MacroFusion(DAG->TII));
   return DAG;
 }
+
 static MachineSchedRegistry
 GenericSchedRegistry("converge", "Standard converging scheduler.",
-                     createGenericSched);
+                     createGenericSchedLive);
+
+//===----------------------------------------------------------------------===//
+// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// PostGenericScheduler - Interface to the scheduling algorithm used by
+/// ScheduleDAGMI.
+///
+/// Callbacks from ScheduleDAGMI:
+///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
+class PostGenericScheduler : public GenericSchedulerBase {
+  ScheduleDAGMI *DAG;
+  SchedBoundary Top;
+  SmallVector<SUnit*, 8> BotRoots;
+public:
+  PostGenericScheduler(const MachineSchedContext *C):
+    GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
+
+  virtual ~PostGenericScheduler() {}
+
+  virtual void initPolicy(MachineBasicBlock::iterator Begin,
+                          MachineBasicBlock::iterator End,
+                          unsigned NumRegionInstrs) LLVM_OVERRIDE {
+    /* no configurable policy */
+  };
+
+  /// PostRA scheduling does not track pressure.
+  virtual bool shouldTrackPressure() const LLVM_OVERRIDE { return false; }
+
+  virtual void initialize(ScheduleDAGMI *Dag) LLVM_OVERRIDE {
+    DAG = Dag;
+    SchedModel = DAG->getSchedModel();
+    TRI = DAG->TRI;
+
+    Rem.init(DAG, SchedModel);
+    Top.init(DAG, SchedModel, &Rem);
+    BotRoots.clear();
+
+    // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
+    // or are disabled, then these HazardRecs will be disabled.
+    const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+    const TargetMachine &TM = DAG->MF.getTarget();
+    if (!Top.HazardRec) {
+      Top.HazardRec =
+        TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+    }
+  }
+
+  virtual void registerRoots() LLVM_OVERRIDE;
+
+  virtual SUnit *pickNode(bool &IsTopNode) LLVM_OVERRIDE;
+
+  virtual void scheduleTree(unsigned SubtreeID) LLVM_OVERRIDE {
+    llvm_unreachable("PostRA scheduler does not support subtree analysis.");
+  }
+
+  virtual void schedNode(SUnit *SU, bool IsTopNode) LLVM_OVERRIDE;
+
+  virtual void releaseTopNode(SUnit *SU) LLVM_OVERRIDE {
+    Top.releaseTopNode(SU);
+  }
+
+  // Only called for roots.
+  virtual void releaseBottomNode(SUnit *SU) LLVM_OVERRIDE {
+    BotRoots.push_back(SU);
+  }
+
+protected:
+  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
+
+  void pickNodeFromQueue(SchedCandidate &Cand);
+};
+} // namespace
+
+void PostGenericScheduler::registerRoots() {
+  Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+  // Some roots may not feed into ExitSU. Check all of them in case.
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
+    if ((*I)->getDepth() > Rem.CriticalPath)
+      Rem.CriticalPath = (*I)->getDepth();
+  }
+  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+}
+
+/// Apply a set of heursitics to a new candidate for PostRA scheduling.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
+                                        SchedCandidate &TryCand) {
+
+  // Initialize the candidate if needed.
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return;
+  }
+
+  // Prioritize instructions that read unbuffered resources by stall cycles.
+  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
+              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+    return;
+
+  // Avoid critical resource consumption and balance the schedule.
+  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+              TryCand, Cand, ResourceReduce))
+    return;
+  if (tryGreater(TryCand.ResDelta.DemandedResources,
+                 Cand.ResDelta.DemandedResources,
+                 TryCand, Cand, ResourceDemand))
+    return;
+
+  // Avoid serializing long latency dependence chains.
+  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
+    return;
+  }
+
+  // Fall through to original instruction order.
+  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+    TryCand.Reason = NodeOrder;
+}
+
+void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
+  ReadyQueue &Q = Top.Available;
+
+  DEBUG(Q.dump());
+
+  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+    SchedCandidate TryCand(Cand.Policy);
+    TryCand.SU = *I;
+    TryCand.initResourceDelta(DAG, SchedModel);
+    tryCandidate(Cand, TryCand);
+    if (TryCand.Reason != NoCand) {
+      Cand.setBest(TryCand);
+      DEBUG(traceCandidate(Cand));
+    }
+  }
+}
+
+/// Pick the next node to schedule.
+SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
+  if (DAG->top() == DAG->bottom()) {
+    assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
+    return NULL;
+  }
+  SUnit *SU;
+  do {
+    SU = Top.pickOnlyChoice();
+    if (!SU) {
+      CandPolicy NoPolicy;
+      SchedCandidate TopCand(NoPolicy);
+      // Set the top-down policy based on the state of the current top zone and
+      // the instructions outside the zone, including the bottom zone.
+      setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL);
+      pickNodeFromQueue(TopCand);
+      assert(TopCand.Reason != NoCand && "failed to find a candidate");
+      tracePick(TopCand, true);
+      SU = TopCand.SU;
+    }
+  } while (SU->isScheduled);
+
+  IsTopNode = true;
+  Top.removeReady(SU);
+
+  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+  return SU;
+}
+
+/// Called after ScheduleDAGMI has scheduled an instruction and updated
+/// scheduled/remaining flags in the DAG nodes.
+void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+  Top.bumpNode(SU);
+}
+
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true);
+}
 
 //===----------------------------------------------------------------------===//
 // ILP Scheduler. Currently for experimental analysis of heuristics.





More information about the llvm-commits mailing list