[llvm-commits] [llvm] r168773 - in /llvm/trunk: include/llvm/CodeGen/ScheduleDFS.h lib/CodeGen/MachineScheduler.cpp lib/CodeGen/ScheduleDAGInstrs.cpp test/CodeGen/X86/misched-matrix.ll

Thu Nov 29 06:40:18 PST 2012

On 29.11.2012, at 13:51, "Demikhovsky, Elena" <elena.demikhovsky at intel.com> wrote:

> Hi Andrew,
>  
> This test
> llc  llvm\test\CodeGen\X86\misched-matrix.ll -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -misched=ilpmin -verify-machineinstrs
>  
> fails with assertion on VC10. I hope you see the attached picture.
>  
> <image001.png>

r168885 should fix this, thanks for the heads up.

For future reference: copying the error message as plain text is probably a better idea than a screenshot. Windows' message boxes do the right thing for CTRL+C as far as I know.

- Ben

>  
> - Elena
> -----Original Message-----
> From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Andrew Trick
> Sent: Wednesday, November 28, 2012 07:13
> To: llvm-commits at cs.uiuc.edu
> Subject: [llvm-commits] [llvm] r168773 - in /llvm/trunk: include/llvm/CodeGen/ScheduleDFS.h lib/CodeGen/MachineScheduler.cpp lib/CodeGen/ScheduleDAGInstrs.cpp test/CodeGen/X86/misched-matrix.ll
>  
> Author: atrick
> Date: Tue Nov 27 23:13:28 2012
> New Revision: 168773
>  
> URL: http://llvm.org/viewvc/llvm-project?rev=168773&view=rev
> Log:
> misched: Analysis that partitions the DAG into subtrees.
>  
> This is a simple, cheap infrastructure for analyzing the shape of a DAG. It recognizes uniform DAGs that take the shape of bottom-up subtrees, such as the included matrix multiplication example. This is useful for heuristics that balance register pressure with ILP. Two canonical expressions of the heuristic are implemented in scheduling
> modes: -misched-ilpmin and -misched-ilpmax.
>  
> Modified:
>     llvm/trunk/include/llvm/CodeGen/ScheduleDFS.h
>     llvm/trunk/lib/CodeGen/MachineScheduler.cpp
>     llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
>     llvm/trunk/test/CodeGen/X86/misched-matrix.ll
>  
> Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDFS.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDFS.h?rev=168773&r1=168772&r2=168773&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/ScheduleDFS.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/ScheduleDFS.h Tue Nov 27 23:13:28
> +++ 2012
> @@ -14,38 +14,41 @@
> #ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H
> #define LLVM_CODEGEN_SCHEDULEDAGILP_H
> +#include "llvm/CodeGen/ScheduleDAG.h"
> #include "llvm/Support/DataTypes.h"
> #include <vector>
>  namespace llvm {
>  class raw_ostream;
> +class IntEqClasses;
> class ScheduleDAGInstrs;
> class SUnit;
>  /// \brief Represent the ILP of the subDAG rooted at a DAG node.
> +///
> +/// When computed using bottom-up DFS, this metric assumes that the DAG
> +is a /// forest of trees with roots at the bottom of the schedule branching upward.
> struct ILPValue {
>    unsigned InstrCount;
> -  unsigned Cycles;
> -
> -  ILPValue(): InstrCount(0), Cycles(0) {}
> +  /// Length may either correspond to depth or height, depending on
> + direction,  /// and cycles or nodes depending on context.
> +  unsigned Length;
> -  ILPValue(unsigned count, unsigned cycles):
> -    InstrCount(count), Cycles(cycles) {}
> -
> -  bool isValid() const { return Cycles > 0; }
> +  ILPValue(unsigned count, unsigned length):
> +    InstrCount(count), Length(length) {}
>    // Order by the ILP metric's value.
>    bool operator<(ILPValue RHS) const {
> -    return (uint64_t)InstrCount * RHS.Cycles
> -      < (uint64_t)Cycles * RHS.InstrCount;
> +    return (uint64_t)InstrCount * RHS.Length
> +      < (uint64_t)Length * RHS.InstrCount;
>    }
>    bool operator>(ILPValue RHS) const {
>      return RHS < *this;
>    }
>    bool operator<=(ILPValue RHS) const {
> -    return (uint64_t)InstrCount * RHS.Cycles
> -      <= (uint64_t)Cycles * RHS.InstrCount;
> +    return (uint64_t)InstrCount * RHS.Length
> +      <= (uint64_t)Length * RHS.InstrCount;
>    }
>    bool operator>=(ILPValue RHS) const {
>      return RHS <= *this;
> @@ -58,25 +61,88 @@
> #endif
> };
> -/// \brief Compute the values of each DAG node for an ILP metric.
> +/// \brief Compute the values of each DAG node for various metrics during DFS.
> ///
> -/// This metric assumes that the DAG is a forest of trees with roots at the -/// bottom of the schedule.
> -class ScheduleDAGILP {
> +/// ILPValues summarize the DAG subtree rooted at each node up to ///
> +SubtreeLimit. ILPValues are also valid for interior nodes of a subtree,
> +not /// just the root.
> +class SchedDFSResult {
> +  friend class SchedDFSImpl;
> +
> +  /// \brief Per-SUnit data computed during DFS for various metrics.
> +  struct NodeData {
> +    unsigned InstrCount;
> +    unsigned SubtreeID;
> +
> +    NodeData(): InstrCount(0), SubtreeID(0) {}  };
> +
> +  /// \brief Record a connection between subtrees and the connection level.
> +  struct Connection {
> +    unsigned TreeID;
> +    unsigned Level;
> +
> +    Connection(unsigned tree, unsigned level): TreeID(tree),
> + Level(level) {}  };
> +
>    bool IsBottomUp;
> -  std::vector<ILPValue> ILPValues;
> +  unsigned SubtreeLimit;
> +  /// DFS results for each SUnit in this DAG.
> +  std::vector<NodeData> DFSData;
> +
> +  // For each subtree discovered during DFS, record its connections to
> + other  // subtrees.
> +  std::vector<SmallVector<Connection, 4> > SubtreeConnections;
> +
> +  /// Cache the current connection level of each subtree.
> +  /// This mutable array is updated during scheduling.
> +  std::vector<unsigned> SubtreeConnectLevels;
>  public:
> -  ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {}
> +  SchedDFSResult(bool IsBU, unsigned lim)
> +    : IsBottomUp(IsBU), SubtreeLimit(lim) {}
> +
> +  /// \brief Clear the results.
> +  void clear() {
> +    DFSData.clear();
> +    SubtreeConnections.clear();
> +    SubtreeConnectLevels.clear();
> +  }
>    /// \brief Initialize the result data with the size of the DAG.
> -  void resize(unsigned NumSUnits);
> +  void resize(unsigned NumSUnits) {
> +    DFSData.resize(NumSUnits);
> +  }
> -  /// \brief Compute the ILP metric for the subDAG at this root.
> -  void computeILP(const SUnit *Root);
> +  /// \brief Compute various metrics for the DAG with given roots.
> +  void compute(ArrayRef<SUnit *> Roots);
>    /// \brief Get the ILP value for a DAG node.
> -  ILPValue getILP(const SUnit *SU);
> +  ///
> +  /// A leaf node has an ILP of 1/1.
> +  ILPValue getILP(const SUnit *SU) {
> +    return ILPValue(DFSData[SU->NodeNum].InstrCount, 1 +
> + SU->getDepth());  }
> +
> +  /// \brief The number of subtrees detected in this DAG.
> +  unsigned getNumSubtrees() const { return SubtreeConnectLevels.size();
> + }
> +
> +  /// \brief Get the ID of the subtree the given DAG node belongs to.
> +  unsigned getSubtreeID(const SUnit *SU) {
> +    return DFSData[SU->NodeNum].SubtreeID;  }
> +
> +  /// \brief Get the connection level of a subtree.
> +  ///
> +  /// For bottom-up trees, the connection level is the latency depth
> + (in cycles)  /// of the deepest connection to another subtree.
> +  unsigned getSubtreeLevel(unsigned SubtreeID) {
> +    return SubtreeConnectLevels[SubtreeID];  }
> +
> +  /// \brief Scheduler callback to update SubtreeConnectLevels when a
> + tree is  /// initially scheduled.
> +  void scheduleTree(unsigned SubtreeID);
> };
>  raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
>  
> Modified: llvm/trunk/lib/CodeGen/MachineScheduler.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineScheduler.cpp?rev=168773&r1=168772&r2=168773&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp Tue Nov 27 23:13:28 2012
> @@ -2054,58 +2054,99 @@
> namespace {
> /// \brief Order nodes by the ILP metric.
> struct ILPOrder {
> -  ScheduleDAGILP *ILP;
> +  SchedDFSResult *DFSResult;
> +  BitVector *ScheduledTrees;
>    bool MaximizeILP;
> -  ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
> +  ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP)
> +    : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP)
> + {}
>    /// \brief Apply a less-than relation on node priority.
> +  ///
> +  /// (Return true if A comes after B in the Q.)
>    bool operator()(const SUnit *A, const SUnit *B) const {
> -    // Return true if A comes after B in the Q.
> +    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
> +    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
> +    if (SchedTreeA != SchedTreeB) {
> +      // Unscheduled trees have lower priority.
> +      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
> +        return ScheduledTrees->test(SchedTreeB);
> +
> +      // Trees with shallower connections have have lower priority.
> +      if (DFSResult->getSubtreeLevel(SchedTreeA)
> +          != DFSResult->getSubtreeLevel(SchedTreeB)) {
> +        return DFSResult->getSubtreeLevel(SchedTreeA)
> +          < DFSResult->getSubtreeLevel(SchedTreeB);
> +      }
> +    }
>      if (MaximizeILP)
> -      return ILP->getILP(A) < ILP->getILP(B);
> +      return DFSResult->getILP(A) < DFSResult->getILP(B);
>      else
> -      return ILP->getILP(A) > ILP->getILP(B);
> +      return DFSResult->getILP(A) > DFSResult->getILP(B);
>    }
> };
>  /// \brief Schedule based on the ILP metric.
> class ILPScheduler : public MachineSchedStrategy {
> -  ScheduleDAGILP ILP;
> +  /// In case all subtrees are eventually connected to a common root
> + through  /// data dependence (e.g. reduction), place an upper limit on their size.
> +  ///
> +  /// FIXME: A subtree limit is generally good, but in the situation
> + commented  /// above, where multiple similar subtrees feed a common
> + root, we should  /// only split at a point where the resulting subtrees will be balanced.
> +  /// (a motivating test case must be found).
> +  static const unsigned SubtreeLimit = 16;
> +
> +  SchedDFSResult DFSResult;
> +  BitVector ScheduledTrees;
>    ILPOrder Cmp;
>    std::vector<SUnit*> ReadyQ;
> public:
>    ILPScheduler(bool MaximizeILP)
> -  : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
> +  : DFSResult(/*BottomUp=*/true, SubtreeLimit),
> +    Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {}
>    virtual void initialize(ScheduleDAGMI *DAG) {
>      ReadyQ.clear();
> -    ILP.resize(DAG->SUnits.size());
> +    DFSResult.clear();
> +    DFSResult.resize(DAG->SUnits.size());
> +    ScheduledTrees.clear();
>    }
>    virtual void registerRoots() {
> -    for (std::vector<SUnit*>::const_iterator
> -           I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
> -      ILP.computeILP(*I);
> -    }
> +    DFSResult.compute(ReadyQ);
> +    ScheduledTrees.resize(DFSResult.getNumSubtrees());
>    }
>    /// Implement MachineSchedStrategy interface.
>    /// -----------------------------------------
> +  /// Callback to select the highest priority node from the ready Q.
>    virtual SUnit *pickNode(bool &IsTopNode) {
>      if (ReadyQ.empty()) return NULL;
>      pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
>      SUnit *SU = ReadyQ.back();
>      ReadyQ.pop_back();
>      IsTopNode = false;
> -    DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
> -          << " ILP: " << ILP.getILP(SU) << '\n');
> +    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
> +          << *SU->getInstr()
> +          << " ILP: " << DFSResult.getILP(SU)
> +          << " Tree: " << DFSResult.getSubtreeID(SU) << " @"
> +          << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<<
> + '\n');
>      return SU;
>    }
> -  virtual void schedNode(SUnit *, bool) {}
> +  /// Callback after a node is scheduled. Mark a newly scheduled tree,
> + notify  /// DFSResults, and resort the priority Q.
> +  virtual void schedNode(SUnit *SU, bool IsTopNode) {
> +    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
> +    if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) {
> +      ScheduledTrees.set(DFSResult.getSubtreeID(SU));
> +      DFSResult.scheduleTree(DFSResult.getSubtreeID(SU));
> +      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
> +    }
> +  }
>    virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
>  
> Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=168773&r1=168772&r2=168773&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
> +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Tue Nov 27 23:13:28
> +++ 2012
> @@ -12,7 +12,7 @@
> //
> //===----------------------------------------------------------------------===//
> -#define DEBUG_TYPE "sched-instrs"
> +#define DEBUG_TYPE "misched"
> #include "llvm/Operator.h"
> #include "llvm/Analysis/AliasAnalysis.h"
> #include "llvm/Analysis/ValueTracking.h"
> @@ -949,6 +949,120 @@
>    return "dag." + BB->getFullName();
> }
> +//===------------------------------------------------------------------
> +----===//
> +// SchedDFSResult Implementation
> +//===------------------------------------------------------------------
> +----===//
> +
> +namespace llvm {
> +/// \brief Internal state used to compute SchedDFSResult.
> +class SchedDFSImpl {
> +  SchedDFSResult &R;
> +
> +  /// Join DAG nodes into equivalence classes by their subtree.
> +  IntEqClasses SubtreeClasses;
> +  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
> +  std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
> +
> +public:
> +  SchedDFSImpl(SchedDFSResult &r): R(r),
> +SubtreeClasses(R.DFSData.size()) {}
> +
> +  /// SubtreID is initialized to zero, set to itself to flag the root
> + of a  /// subtree, set to the parent to indicate an interior node, 
> + /// then set to a representative subtree ID during finalization.
> +  bool isVisited(const SUnit *SU) const {
> +    return R.DFSData[SU->NodeNum].SubtreeID;  }
> +
> +  /// Initialize this node's instruction count. We don't need to flag
> + the node  /// visited until visitPostorder because the DAG cannot have cycles.
> +  void visitPreorder(const SUnit *SU) {
> +    R.DFSData[SU->NodeNum].InstrCount = SU->getInstr()->isTransient() ?
> + 0 : 1;  }
> +
> +  /// Mark this node as either the root of a subtree or an interior 
> + /// node. Increment the parent node's instruction count.
> +  void visitPostorder(const SUnit *SU, const SDep *PredDep, const SUnit *Parent) {
> +    R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum;
> +
> +    // Join the child to its parent if they are connected via data dependence
> +    // and do not exceed the limit.
> +    if (!Parent || PredDep->getKind() != SDep::Data)
> +      return;
> +
> +    unsigned PredCnt = R.DFSData[SU->NodeNum].InstrCount;
> +    if (PredCnt > R.SubtreeLimit)
> +      return;
> +
> +    R.DFSData[SU->NodeNum].SubtreeID = Parent->NodeNum;
> +
> +    // Add the recently finished predecessor's bottom-up descendent count.
> +    R.DFSData[Parent->NodeNum].InstrCount += PredCnt;
> +    SubtreeClasses.join(Parent->NodeNum, SU->NodeNum);  }
> +
> +  /// Determine whether the DFS cross edge should be considered a
> + subtree edge  /// or a connection between subtrees.
> +  void visitCross(const SDep &PredDep, const SUnit *Succ) {
> +    if (PredDep.getKind() == SDep::Data) {
> +      // If this is a cross edge to a root, join the subtrees. This happens when
> +      // the root was first reached by a non-data dependence.
> +      unsigned NodeNum = PredDep.getSUnit()->NodeNum;
> +      unsigned PredCnt = R.DFSData[NodeNum].InstrCount;
> +      if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) {
> +        R.DFSData[NodeNum].SubtreeID = Succ->NodeNum;
> +        R.DFSData[Succ->NodeNum].InstrCount += PredCnt;
> +        SubtreeClasses.join(Succ->NodeNum, NodeNum);
> +        return;
> +      }
> +    }
> +    ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(),
> + Succ));  }
> +
> +  /// Set each node's subtree ID to the representative ID and record
> + connections  /// between trees.
> +  void finalize() {
> +    SubtreeClasses.compress();
> +    R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
> +    R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
> +    DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
> +    for (unsigned Idx = 0, End = R.DFSData.size(); Idx != End; ++Idx) {
> +      R.DFSData[Idx].SubtreeID = SubtreeClasses[Idx];
> +      DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
> +            << R.DFSData[Idx].SubtreeID << '\n');
> +    }
> +    for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
> +           I = ConnectionPairs.begin(), E = ConnectionPairs.end();
> +         I != E; ++I) {
> +      unsigned PredTree = SubtreeClasses[I->first->NodeNum];
> +      unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
> +      if (PredTree == SuccTree)
> +        continue;
> +      unsigned Depth = I->first->getDepth();
> +      addConnection(PredTree, SuccTree, Depth);
> +      addConnection(SuccTree, PredTree, Depth);
> +    }
> +  }
> +
> +protected:
> +  /// Called by finalize() to record a connection between trees.
> +  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
> +    if (!Depth)
> +      return;
> +
> +    SmallVectorImpl<SchedDFSResult::Connection> &Connections =
> +      R.SubtreeConnections[FromTree];
> +    for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
> +           I = Connections.begin(), E = Connections.end(); I != E; ++I) {
> +      if (I->TreeID == ToTree) {
> +        I->Level = std::max(I->Level, Depth);
> +        return;
> +      }
> +    }
> +    Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
> +  }
> +};
> +} // namespace llvm
> +
> namespace {
> /// \brief Manage the stack used by a reverse depth-first search over the DAG.
> class SchedDAGReverseDFS {
> @@ -961,7 +1075,10 @@
>    }
>    void advance() { ++DFSStack.back().second; }
> -  void backtrack() { DFSStack.pop_back(); }
> +  const SDep *backtrack() {
> +    DFSStack.pop_back();
> +    return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
> +  }
>    const SUnit *getCurr() const { return DFSStack.back().first; }
> @@ -973,57 +1090,65 @@
> };
> } // anonymous
> -void ScheduleDAGILP::resize(unsigned NumSUnits) {
> -  ILPValues.resize(NumSUnits);
> -}
> -
> -ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
> -  return ILPValues[SU->NodeNum];
> -}
> -
> -// A leaf node has an ILP of 1/1.
> -static ILPValue initILP(const SUnit *SU) {
> -  unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
> -  return ILPValue(Cnt, 1 + SU->getDepth()); -}
> -
> /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first  /// search from this root.
> -void ScheduleDAGILP::computeILP(const SUnit *Root) {
> +void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) {
>    if (!IsBottomUp)
>      llvm_unreachable("Top-down ILP metric is unimplemnted");
> -  SchedDAGReverseDFS DFS;
> -  // Mark a node visited by validating it.
> -  ILPValues[Root->NodeNum] = initILP(Root);
> -  DFS.follow(Root);
> -  for (;;) {
> -    // Traverse the leftmost path as far as possible.
> -    while (DFS.getPred() != DFS.getPredEnd()) {
> -      const SUnit *PredSU = DFS.getPred()->getSUnit();
> -      DFS.advance();
> -      // If the pred is already valid, skip it.
> -      if (ILPValues[PredSU->NodeNum].isValid())
> -        continue;
> -      ILPValues[PredSU->NodeNum] = initILP(PredSU);
> -      DFS.follow(PredSU);
> +  SchedDFSImpl Impl(*this);
> +  for (ArrayRef<const SUnit*>::const_iterator
> +         RootI = Roots.begin(), RootE = Roots.end(); RootI != RootE; ++RootI) {
> +    SchedDAGReverseDFS DFS;
> +    Impl.visitPreorder(*RootI);
> +    DFS.follow(*RootI);
> +    for (;;) {
> +      // Traverse the leftmost path as far as possible.
> +      while (DFS.getPred() != DFS.getPredEnd()) {
> +        const SDep &PredDep = *DFS.getPred();
> +        DFS.advance();
> +        // If the pred is already valid, skip it. We may preorder visit a node
> +        // with InstrCount==0 more than once, but it won't affect heuristics
> +        // because we don't care about cross edges to leaf copies.
> +        if (Impl.isVisited(PredDep.getSUnit())) {
> +          Impl.visitCross(PredDep, DFS.getCurr());
> +          continue;
> +        }
> +        Impl.visitPreorder(PredDep.getSUnit());
> +        DFS.follow(PredDep.getSUnit());
> +      }
> +      // Visit the top of the stack in postorder and backtrack.
> +      const SUnit *Child = DFS.getCurr();
> +      const SDep *PredDep = DFS.backtrack();
> +      Impl.visitPostorder(Child, PredDep, PredDep ? DFS.getCurr() : 0);
> +      if (DFS.isComplete())
> +        break;
>      }
> -    // Visit the top of the stack in postorder and backtrack.
> -    unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
> -    DFS.backtrack();
> -    if (DFS.isComplete())
> -      break;
> -    // Add the recently finished predecessor's bottom-up descendent count.
> -    ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
> +  }
> +  Impl.finalize();
> +}
> +
> +/// The root of the given SubtreeID was just scheduled. For all
> +subtrees /// connected to this tree, record the depth of the connection
> +so that the /// nearest connected subtrees can be prioritized.
> +void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
> +  for (SmallVectorImpl<Connection>::const_iterator
> +         I = SubtreeConnections[SubtreeID].begin(),
> +         E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
> +    SubtreeConnectLevels[I->TreeID] =
> +      std::max(SubtreeConnectLevels[I->TreeID], I->Level);
> +    DEBUG(dbgs() << "  Tree: " << I->TreeID
> +          << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
>    }
> }
>  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  void ILPValue::print(raw_ostream &OS) const {
> -  if (!isValid())
> +  OS << InstrCount << " / " << Length << " = ";  if (!Length)
>      OS << "BADILP";
> -  OS << InstrCount << " / " << Cycles << " = "
> -     << format("%g", ((double)InstrCount / Cycles));
> +  else
> +    OS << format("%g", ((double)InstrCount / Length));
> }
>  void ILPValue::dump() const {
>  
> Modified: llvm/trunk/test/CodeGen/X86/misched-matrix.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/misched-matrix.ll?rev=168773&r1=168772&r2=168773&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/misched-matrix.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/misched-matrix.ll Tue Nov 27 23:13:28
> +++ 2012
> @@ -1,6 +1,12 @@
> ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
> ; RUN:          -misched-topdown -verify-machineinstrs \
> ; RUN:     | FileCheck %s -check-prefix=TOPDOWN
> +; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
> +; RUN:          -misched=ilpmin -verify-machineinstrs \
> +; RUN:     | FileCheck %s -check-prefix=ILPMIN
> +; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
> +; RUN:          -misched=ilpmax -verify-machineinstrs \
> +; RUN:     | FileCheck %s -check-prefix=ILPMAX
> ;
> ; Verify that the MI scheduler minimizes register pressure for a  ; uniform set of bottom-up subtrees (unrolled matrix multiply).
> @@ -17,6 +23,68 @@
> ; TOPDOWN: movl %{{.*}}, 8(
> ; TOPDOWN: movl %{{.*}}, 12(
> ; TOPDOWN: %for.end
> +;
> +; For -misched=ilpmin, verify that each expression subtree is ;
> +scheduled independently, and that the imull/adds are interleaved.
> +;
> +; ILPMIN: %for.body
> +; ILPMIN: movl %{{.*}}, (
> +; ILPMIN: imull
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: movl %{{.*}}, 4(
> +; ILPMIN: imull
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: movl %{{.*}}, 8(
> +; ILPMIN: imull
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: imull
> +; ILPMIN: addl
> +; ILPMIN: movl %{{.*}}, 12(
> +; ILPMIN: %for.end
> +;
> +; For -misched=ilpmax, verify that each expression subtree is ;
> +scheduled independently, and that the imull/adds are clustered.
> +;
> +; ILPMAX: %for.body
> +; ILPMAX: movl %{{.*}}, (
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: addl
> +; ILPMAX: addl
> +; ILPMAX: addl
> +; ILPMAX: movl %{{.*}}, 4(
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: addl
> +; ILPMAX: addl
> +; ILPMAX: addl
> +; ILPMAX: movl %{{.*}}, 8(
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: imull
> +; ILPMAX: addl
> +; ILPMAX: addl
> +; ILPMAX: addl
> +; ILPMAX: movl %{{.*}}, 12(
> +; ILPMAX: %for.end
>  define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
> [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
>  
>  
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
> 
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits