[llvm] [MachinePipeliner] Make Recurrence MII More Accurate (PR #105475)

Michael Marjieh via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 26 07:34:56 PDT 2024


================
@@ -339,24 +341,58 @@ class NodeSet {
   using iterator = SetVector<SUnit *>::const_iterator;
 
   NodeSet() = default;
-  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
-    Latency = 0;
-    for (const SUnit *Node : Nodes) {
-      DenseMap<SUnit *, unsigned> SuccSUnitLatency;
-      for (const SDep &Succ : Node->Succs) {
-        auto SuccSUnit = Succ.getSUnit();
-        if (!Nodes.count(SuccSUnit))
+  NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
+      : Nodes(S, E), HasRecurrence(true) {
+    // Calculate the latency of this node set.
+    // Example to demonstrate the calculation:
+    // Given: N0 -> N1 -> N2 -> N0
+    // Edges:
+    // (N0 -> N1, 3)
+    // (N0 -> N1, 5)
+    // (N1 -> N2, 2)
+    // (N2 -> N0, 1)
+    // The total latency which is a lower bound of the recurrence MII is the
+    // longest patch from N0 back to N0 given only the edges of this node set.
+    // In this example, the latency is: 5 + 2 + 1 = 8.
+    //
+    // Hold a map from each SUnit in the circle to the maximum distance from the
+    // source node by only considering the nodes.
+    DenseMap<SUnit *, unsigned> SUnitToDistance;
+    for (auto *Node : Nodes)
+      SUnitToDistance[Node] = 0;
+
+    for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
+      SUnit *U = Nodes[I - 1];
+      SUnit *V = Nodes[I % Nodes.size()];
+      for (const SDep &Succ : U->Succs) {
+        SUnit *SuccSUnit = Succ.getSUnit();
+        if (V != SuccSUnit)
           continue;
-        unsigned CurLatency = Succ.getLatency();
-        unsigned MaxLatency = 0;
-        if (SuccSUnitLatency.count(SuccSUnit))
-          MaxLatency = SuccSUnitLatency[SuccSUnit];
-        if (CurLatency > MaxLatency)
-          SuccSUnitLatency[SuccSUnit] = CurLatency;
+        if (SUnitToDistance[U] + Succ.getLatency() > SUnitToDistance[V]) {
+          SUnitToDistance[V] = SUnitToDistance[U] + Succ.getLatency();
+        }
       }
-      for (auto SUnitLatency : SuccSUnitLatency)
-        Latency += SUnitLatency.second;
     }
+    // Handle a back-edge between a store and a load
+    SUnit *FirstNode = Nodes[0];
+    SUnit *LastNode = Nodes[Nodes.size() - 1];
+
+    if (LastNode->getInstr()->mayStore() && FirstNode->getInstr()->mayLoad()) {
+      for (auto &PI : LastNode->Preds) {
+        // If we have an order dep between a load and a store that is
+        // potentially loop carried then a back-edge exists between the last
+        // node and the first node that isn't modeled in the DAG. Handle it
+        // manually by adding 1 to the distance of the last node.
+        if (PI.getSUnit() != FirstNode || PI.getKind() != SDep::Order ||
+            !DAG->isLoopCarriedDep(LastNode, PI, false))
+          continue;
+        SUnitToDistance[FirstNode] =
+            std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1);
+      }
+    }
----------------
mmarjieh wrote:

Done

https://github.com/llvm/llvm-project/pull/105475


More information about the llvm-commits mailing list