[llvm] r358583 - [ScheduleDAGRRList] Recompute topological ordering on demand.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 17 08:05:29 PDT 2019


Author: fhahn
Date: Wed Apr 17 08:05:29 2019
New Revision: 358583

URL: http://llvm.org/viewvc/llvm-project?rev=358583&view=rev
Log:
[ScheduleDAGRRList] Recompute topological ordering on demand.

Currently there is a single point in ScheduleDAGRRList, where we
actually query the topological order (besides init code). Currently we
are recomputing the order after adding a node (which does not have
predecessors) and then we add predecessors edge-by-edge.

We can avoid adding edges one-by-one after we added a new node. In that case, we can
just rebuild the order from scratch after adding the edges to the DAG
and avoid all the updates to the ordering.

Also, we can delay updating the DAG until we query the DAG, if we keep a
list of added edges. Depending on the number of updates, we can either
apply them when needed or recompute the order from scratch.

This brings down the geomean compile time for of CTMark with -O1 down 0.3% on X86,
with no regressions.

Reviewers: MatzeB, atrick, efriedma, niravd, paquette

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D60125

Modified:
    llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
    llvm/trunk/lib/CodeGen/ScheduleDAG.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=358583&r1=358582&r2=358583&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Wed Apr 17 08:05:29 2019
@@ -691,6 +691,12 @@ class TargetRegisterInfo;
     std::vector<SUnit> &SUnits;
     SUnit *ExitSU;
 
+    // Have any new nodes been added?
+    bool Dirty = false;
+
+    // Outstanding added edges, that have not been applied to the ordering.
+    SmallVector<std::pair<SUnit *, SUnit *>, 16> Updates;
+
     /// Maps topological index to the node number.
     std::vector<int> Index2Node;
     /// Maps the node number to its topological index.
@@ -710,6 +716,11 @@ class TargetRegisterInfo;
     /// Assigns the topological index to the node n.
     void Allocate(int n, int index);
 
+    /// Fix the ordering, by either recomputing from scratch or by applying
+    /// any outstanding updates. Uses a heuristic to estimate what will be
+    /// cheaper.
+    void FixOrder();
+
   public:
     ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
 
@@ -734,11 +745,19 @@ class TargetRegisterInfo;
     /// added from SUnit \p X to SUnit \p Y.
     void AddPred(SUnit *Y, SUnit *X);
 
+    /// Queues an update to the topological ordering to accommodate an edge to
+    /// be added from SUnit \p X to SUnit \p Y.
+    void AddPredQueued(SUnit *Y, SUnit *X);
+
     /// Updates the topological ordering to accommodate an an edge to be
     /// removed from the specified node \p N from the predecessors of the
     /// current node \p M.
     void RemovePred(SUnit *M, SUnit *N);
 
+    /// Mark the ordering as temporarily broken, after a new node has been
+    /// added.
+    void MarkDirty() { Dirty = true; }
+
     typedef std::vector<int>::iterator iterator;
     typedef std::vector<int>::const_iterator const_iterator;
     iterator begin() { return Index2Node.begin(); }

Modified: llvm/trunk/lib/CodeGen/ScheduleDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAG.cpp?rev=358583&r1=358582&r2=358583&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScheduleDAG.cpp Wed Apr 17 08:05:29 2019
@@ -462,6 +462,11 @@ void ScheduleDAGTopologicalSort::InitDAG
   // On insertion of the edge X->Y, the algorithm first marks by calling DFS
   // the nodes reachable from Y, and then shifts them using Shift to lie
   // immediately after X in Index2Node.
+
+  // Cancel pending updates, mark as valid.
+  Dirty = false;
+  Updates.clear();
+
   unsigned DAGSize = SUnits.size();
   std::vector<SUnit*> WorkList;
   WorkList.reserve(DAGSize);
@@ -515,6 +520,31 @@ void ScheduleDAGTopologicalSort::InitDAG
 #endif
 }
 
+void ScheduleDAGTopologicalSort::FixOrder() {
+  // Recompute from scratch after new nodes have been added.
+  if (Dirty) {
+    InitDAGTopologicalSorting();
+    return;
+  }
+
+  // Otherwise apply updates one-by-one.
+  for (auto &U : Updates)
+    AddPred(U.first, U.second);
+  Updates.clear();
+}
+
+void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) {
+  // Recomputing the order from scratch is likely more efficient than applying
+  // updates one-by-one for too many updates. The current cut-off is arbitrarily
+  // chosen.
+  Dirty = Dirty || Updates.size() > 10;
+
+  if (Dirty)
+    return;
+
+  Updates.emplace_back(Y, X);
+}
+
 void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
   int UpperBound, LowerBound;
   LowerBound = Node2Index[Y->NodeNum];
@@ -672,6 +702,7 @@ void ScheduleDAGTopologicalSort::Shift(B
 }
 
 bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+  FixOrder();
   // Is SU reachable from TargetSU via successor edges?
   if (IsReachable(SU, TargetSU))
     return true;
@@ -684,6 +715,7 @@ bool ScheduleDAGTopologicalSort::WillCre
 
 bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
                                              const SUnit *TargetSU) {
+  FixOrder();
   // If insertion of the edge SU->TargetSU would create a cycle
   // then there is a path from TargetSU to SU.
   int UpperBound, LowerBound;

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp?rev=358583&r1=358582&r2=358583&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Wed Apr 17 08:05:29 2019
@@ -219,6 +219,14 @@ public:
     return Topo.WillCreateCycle(SU, TargetSU);
   }
 
+  /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  /// Does *NOT* update the topological ordering! It just queues an update.
+  void AddPredQueued(SUnit *SU, const SDep &D) {
+    Topo.AddPredQueued(SU, D.getSUnit());
+    SU->addPred(D);
+  }
+
   /// AddPred - adds a predecessor edge to SUnit SU.
   /// This returns true if this is a new predecessor.
   /// Updates the topological ordering if required.
@@ -266,24 +274,22 @@ private:
   void ListScheduleBottomUp();
 
   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
-  /// Updates the topological ordering if required.
   SUnit *CreateNewSUnit(SDNode *N) {
     unsigned NumSUnits = SUnits.size();
     SUnit *NewNode = newSUnit(N);
     // Update the topological ordering.
     if (NewNode->NodeNum >= NumSUnits)
-      Topo.InitDAGTopologicalSorting();
+      Topo.MarkDirty();
     return NewNode;
   }
 
   /// CreateClone - Creates a new SUnit from an existing one.
-  /// Updates the topological ordering if required.
   SUnit *CreateClone(SUnit *N) {
     unsigned NumSUnits = SUnits.size();
     SUnit *NewNode = Clone(N);
     // Update the topological ordering.
     if (NewNode->NodeNum >= NumSUnits)
-      Topo.InitDAGTopologicalSorting();
+      Topo.MarkDirty();
     return NewNode;
   }
 
@@ -365,7 +371,7 @@ void ScheduleDAGRRList::Schedule() {
   BuildSchedGraph(nullptr);
 
   LLVM_DEBUG(dump());
-  Topo.InitDAGTopologicalSorting();
+  Topo.MarkDirty();
 
   AvailableQueue->initNodes(SUnits);
 
@@ -1017,8 +1023,9 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SU
     NewSU = &SUnits[N->getNodeId()];
     // If NewSU has already been scheduled, we need to clone it, but this
     // negates the benefit to unfolding so just return SU.
-    if (NewSU->isScheduled)
+    if (NewSU->isScheduled) {
       return SU;
+    }
     isNewN = false;
   } else {
     NewSU = CreateNewSUnit(N);
@@ -1071,23 +1078,23 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SU
   for (const SDep &Pred : ChainPreds) {
     RemovePred(SU, Pred);
     if (isNewLoad)
-      AddPred(LoadSU, Pred);
+      AddPredQueued(LoadSU, Pred);
   }
   for (const SDep &Pred : LoadPreds) {
     RemovePred(SU, Pred);
     if (isNewLoad)
-      AddPred(LoadSU, Pred);
+      AddPredQueued(LoadSU, Pred);
   }
   for (const SDep &Pred : NodePreds) {
     RemovePred(SU, Pred);
-    AddPred(NewSU, Pred);
+    AddPredQueued(NewSU, Pred);
   }
   for (SDep D : NodeSuccs) {
     SUnit *SuccDep = D.getSUnit();
     D.setSUnit(SU);
     RemovePred(SuccDep, D);
     D.setSUnit(NewSU);
-    AddPred(SuccDep, D);
+    AddPredQueued(SuccDep, D);
     // Balance register pressure.
     if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
         !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
@@ -1099,7 +1106,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SU
     RemovePred(SuccDep, D);
     if (isNewLoad) {
       D.setSUnit(LoadSU);
-      AddPred(SuccDep, D);
+      AddPredQueued(SuccDep, D);
     }
   }
 
@@ -1107,7 +1114,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SU
   // by LoadSU.
   SDep D(LoadSU, SDep::Data, 0);
   D.setLatency(LoadSU->Latency);
-  AddPred(NewSU, D);
+  AddPredQueued(NewSU, D);
 
   if (isNewLoad)
     AvailableQueue->addNode(LoadSU);
@@ -1179,7 +1186,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuc
   // New SUnit has the exact same predecessors.
   for (SDep &Pred : SU->Preds)
     if (!Pred.isArtificial())
-      AddPred(NewSU, Pred);
+      AddPredQueued(NewSU, Pred);
 
   // Only copy scheduled successors. Cut them from old node's successor
   // list and move them over.
@@ -1191,7 +1198,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuc
     if (SuccSU->isScheduled) {
       SDep D = Succ;
       D.setSUnit(NewSU);
-      AddPred(SuccSU, D);
+      AddPredQueued(SuccSU, D);
       D.setSUnit(SU);
       DelDeps.push_back(std::make_pair(SuccSU, D));
     }
@@ -1230,14 +1237,14 @@ void ScheduleDAGRRList::InsertCopiesAndM
     if (SuccSU->isScheduled) {
       SDep D = Succ;
       D.setSUnit(CopyToSU);
-      AddPred(SuccSU, D);
+      AddPredQueued(SuccSU, D);
       DelDeps.push_back(std::make_pair(SuccSU, Succ));
     }
     else {
       // Avoid scheduling the def-side copy before other successors. Otherwise
       // we could introduce another physreg interference on the copy and
       // continue inserting copies indefinitely.
-      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+      AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
     }
   }
   for (auto &DelDep : DelDeps)
@@ -1245,10 +1252,10 @@ void ScheduleDAGRRList::InsertCopiesAndM
 
   SDep FromDep(SU, SDep::Data, Reg);
   FromDep.setLatency(SU->Latency);
-  AddPred(CopyFromSU, FromDep);
+  AddPredQueued(CopyFromSU, FromDep);
   SDep ToDep(CopyFromSU, SDep::Data, 0);
   ToDep.setLatency(CopyFromSU->Latency);
-  AddPred(CopyToSU, ToDep);
+  AddPredQueued(CopyToSU, ToDep);
 
   AvailableQueue->updateNode(SU);
   AvailableQueue->addNode(CopyFromSU);
@@ -1478,6 +1485,11 @@ SUnit *ScheduleDAGRRList::PickNodeToSche
   if (CurSU)
     return CurSU;
 
+  // We query the topological order in the loop body, so make sure outstanding
+  // updates are applied before entering it (we only enter the loop if there
+  // are some interferences). If we make changes to the ordering, we exit
+  // the loop.
+
   // All candidates are delayed due to live physical reg dependencies.
   // Try backtracking, code duplication, or inserting cross class copies
   // to resolve it.
@@ -1507,7 +1519,7 @@ SUnit *ScheduleDAGRRList::PickNodeToSche
       }
       LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
                         << ") to SU(" << TrySU->NodeNum << ")\n");
-      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
+      AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
 
       // If one or more successors has been unscheduled, then the current
       // node is no longer available.
@@ -1561,14 +1573,14 @@ SUnit *ScheduleDAGRRList::PickNodeToSche
       InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
       LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
                         << " to SU #" << Copies.front()->NodeNum << "\n");
-      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+      AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
       NewDef = Copies.back();
     }
 
     LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
                       << " to SU #" << TrySU->NodeNum << "\n");
     LiveRegDefs[Reg] = NewDef;
-    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+    AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
     TrySU->isAvailable = false;
     CurSU = NewDef;
   }
@@ -3017,9 +3029,9 @@ void RegReductionPQBase::PrescheduleNode
       if (SuccSU != &SU) {
         Edge.setSUnit(PredSU);
         scheduleDAG->RemovePred(SuccSU, Edge);
-        scheduleDAG->AddPred(&SU, Edge);
+        scheduleDAG->AddPredQueued(&SU, Edge);
         Edge.setSUnit(&SU);
-        scheduleDAG->AddPred(SuccSU, Edge);
+        scheduleDAG->AddPredQueued(SuccSU, Edge);
         --i;
       }
     }
@@ -3101,7 +3113,7 @@ void RegReductionPQBase::AddPseudoTwoAdd
           LLVM_DEBUG(dbgs()
                      << "    Adding a pseudo-two-addr edge from SU #"
                      << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
-          scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
+          scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
         }
       }
     }




More information about the llvm-commits mailing list