[llvm] r281716 - [LCG] Redesign the lazy post-order iteration mechanism for the

Fri Sep 16 03:20:17 PDT 2016

Author: chandlerc
Date: Fri Sep 16 05:20:17 2016
New Revision: 281716

URL: http://llvm.org/viewvc/llvm-project?rev=281716&view=rev
Log:
[LCG] Redesign the lazy post-order iteration mechanism for the
LazyCallGraph to support repeated, stable iterations, even in the face
of graph updates.

This is particularly important to allow the CGSCC pass manager to walk
the RefSCCs (and thus everything else) in a module more than once. Lots
of unittests and other tests were hard or impossible to write because
repeated CGSCC pass managers which didn't invalidate the LazyCallGraph
would conclude the module was empty after the first one. =[ Really,
really bad.

The interesting thing is that in many ways this simplifies the code. We
can now re-use the same code for handling reference edge insertion
updates of the RefSCC graph as we use for handling call edge insertion
updates of the SCC graph. Outside of adapting to the shared logic for
this (which isn't trivial, but is *much* simpler than the DFS it
replaces!), the new code involves putting newly created RefSCCs when
deleting a reference edge into the cached list in the correct way, and
to re-formulate the iterator to be stable and effective even in the face
of these kinds of updates.

I've updated the unittests for the LazyCallGraph to re-iterate the
postorder sequence and verify that this all works. We even check for
using alternating iterators to trigger the lazy formation of RefSCCs
after mutation has occured.

It's worth noting that there are a reasonable number of likely
simplifications we can make past this. It isn't clear that we need to
keep the "LeafRefSCCs" around any more. But I've not removed that mostly
because I want this to be a more isolated change.

Differential Revision: https://reviews.llvm.org/D24219

Modified:
    llvm/trunk/include/llvm/Analysis/LazyCallGraph.h
    llvm/trunk/lib/Analysis/LazyCallGraph.cpp
    llvm/trunk/unittests/Analysis/LazyCallGraphTest.cpp

Modified: llvm/trunk/include/llvm/Analysis/LazyCallGraph.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LazyCallGraph.h?rev=281716&r1=281715&r2=281716&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/LazyCallGraph.h (original)
+++ llvm/trunk/include/llvm/Analysis/LazyCallGraph.h Fri Sep 16 05:20:17 2016
@@ -730,27 +730,39 @@ public:
     struct IsAtEndT {};
 
     LazyCallGraph *G;
-    RefSCC *C;
+    RefSCC *RC;
 
-    // Build the begin iterator for a node.
-    postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G) {
-      C = G.getNextRefSCCInPostOrder();
-    }
+    /// Build the begin iterator for a node.
+    postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G), RC(getRC(G, 0)) {}
 
-    // Build the end iterator for a node. This is selected purely by overload.
+    /// Build the end iterator for a node. This is selected purely by overload.
     postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/)
-        : G(&G), C(nullptr) {}
+        : G(&G), RC(nullptr) {}
+
+    /// Get the post-order RefSCC at the given index of the postorder walk,
+    /// populating it if necessary.
+    static RefSCC *getRC(LazyCallGraph &G, int Index) {
+      if (Index == (int)G.PostOrderRefSCCs.size())
+        if (!G.buildNextRefSCCInPostOrder())
+          // We're at the end.
+          return nullptr;
+
+      assert(Index < (int)G.PostOrderRefSCCs.size() &&
+             "Built the next post-order RefSCC without growing list!");
+      return G.PostOrderRefSCCs[Index];
+    }
 
   public:
     bool operator==(const postorder_ref_scc_iterator &Arg) const {
-      return G == Arg.G && C == Arg.C;
+      return G == Arg.G && RC == Arg.RC;
     }
 
-    reference operator*() const { return *C; }
+    reference operator*() const { return *RC; }
 
     using iterator_facade_base::operator++;
     postorder_ref_scc_iterator &operator++() {
-      C = G->getNextRefSCCInPostOrder();
+      assert(RC && "Cannot increment the end iterator!");
+      RC = getRC(*G, G->RefSCCIndices.find(RC)->second + 1);
       return *this;
     }
   };
@@ -897,6 +909,15 @@ private:
   /// Allocator that holds all the call graph RefSCCs.
   SpecificBumpPtrAllocator<RefSCC> RefSCCBPA;
 
+  /// The post-order sequence of RefSCCs.
+  ///
+  /// This list is lazily formed the first time we walk the graph.
+  SmallVector<RefSCC *, 16> PostOrderRefSCCs;
+
+  /// A map from RefSCC to the index for it in the postorder sequence of
+  /// RefSCCs.
+  DenseMap<RefSCC *, int> RefSCCIndices;
+
   /// The leaf RefSCCs of the graph.
   ///
   /// These are all of the RefSCCs which have no children.
@@ -944,8 +965,24 @@ private:
   /// and updates the root leaf list.
   void connectRefSCC(RefSCC &RC);
 
-  /// Retrieve the next node in the post-order RefSCC walk of the call graph.
-  RefSCC *getNextRefSCCInPostOrder();
+  /// Get the index of a RefSCC within the postorder traversal.
+  ///
+  /// Requires that this RefSCC is a valid one in the (perhaps partial)
+  /// postorder traversed part of the graph.
+  int getRefSCCIndex(RefSCC &RC) {
+    auto IndexIt = RefSCCIndices.find(&RC);
+    assert(IndexIt != RefSCCIndices.end() && "RefSCC doesn't have an index!");
+    assert(PostOrderRefSCCs[IndexIt->second] == &RC &&
+           "Index does not point back at RC!");
+    return IndexIt->second;
+  }
+
+  /// Builds the next node in the post-order RefSCC walk of the call graph and
+  /// appends it to the \c PostOrderRefSCCs vector.
+  ///
+  /// Returns true if a new RefSCC was successfully constructed, and false if
+  /// there are no more RefSCCs to build in the graph.
+  bool buildNextRefSCCInPostOrder();
 };
 
 inline LazyCallGraph::Edge::Edge() : Value() {}

Modified: llvm/trunk/lib/Analysis/LazyCallGraph.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LazyCallGraph.cpp?rev=281716&r1=281715&r2=281716&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/LazyCallGraph.cpp (original)
+++ llvm/trunk/lib/Analysis/LazyCallGraph.cpp Fri Sep 16 05:20:17 2016
@@ -9,6 +9,7 @@
 
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstVisitor.h"
@@ -801,7 +802,13 @@ void LazyCallGraph::RefSCC::insertOutgoi
 
 SmallVector<LazyCallGraph::RefSCC *, 1>
 LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
-  assert(G->lookupRefSCC(TargetN) == this && "Target must be in this SCC.");
+  assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
+  RefSCC &SourceC = *G->lookupRefSCC(SourceN);
+  assert(&SourceC != this && "Source must not be in this RefSCC.");
+  assert(SourceC.isDescendantOf(*this) &&
+         "Source must be a descendant of the Target.");
+
+  SmallVector<RefSCC *, 1> DeletedRefSCCs;
 
 #ifndef NDEBUG
   // In a debug build, verify the RefSCC is valid to start with and when this
@@ -810,110 +817,94 @@ LazyCallGraph::RefSCC::insertIncomingRef
   auto VerifyOnExit = make_scope_exit([&]() { verify(); });
 #endif
 
-  // We store the RefSCCs found to be connected in postorder so that we can use
-  // that when merging. We also return this to the caller to allow them to
-  // invalidate information pertaining to these RefSCCs.
-  SmallVector<RefSCC *, 1> Connected;
-
-  RefSCC &SourceC = *G->lookupRefSCC(SourceN);
-  assert(&SourceC != this && "Source must not be in this SCC.");
-  assert(SourceC.isDescendantOf(*this) &&
-         "Source must be a descendant of the Target.");
-
-  // The algorithm we use for merging SCCs based on the cycle introduced here
-  // is to walk the RefSCC inverted DAG formed by the parent sets. The inverse
-  // graph has the same cycle properties as the actual DAG of the RefSCCs, and
-  // when forming RefSCCs lazily by a DFS, the bottom of the graph won't exist
-  // in many cases which should prune the search space.
-  //
-  // FIXME: We can get this pruning behavior even after the incremental RefSCC
-  // formation by leaving behind (conservative) DFS numberings in the nodes,
-  // and pruning the search with them. These would need to be cleverly updated
-  // during the removal of intra-SCC edges, but could be preserved
-  // conservatively.
-  //
-  // FIXME: This operation currently creates ordering stability problems
-  // because we don't use stably ordered containers for the parent SCCs.
-
-  // The set of RefSCCs that are connected to the parent, and thus will
-  // participate in the merged connected component.
-  SmallPtrSet<RefSCC *, 8> ConnectedSet;
-  ConnectedSet.insert(this);
-
-  // We build up a DFS stack of the parents chains.
-  SmallVector<std::pair<RefSCC *, parent_iterator>, 8> DFSStack;
-  SmallPtrSet<RefSCC *, 8> Visited;
-  int ConnectedDepth = -1;
-  DFSStack.push_back({&SourceC, SourceC.parent_begin()});
-  do {
-    auto DFSPair = DFSStack.pop_back_val();
-    RefSCC *C = DFSPair.first;
-    parent_iterator I = DFSPair.second;
-    auto E = C->parent_end();
+  int SourceIdx = G->RefSCCIndices[&SourceC];
+  int TargetIdx = G->RefSCCIndices[this];
+  assert(SourceIdx < TargetIdx &&
+         "Postorder list doesn't see edge as incoming!");
+
+  // Compute the RefSCCs which (transitively) reach the source. We do this by
+  // working backwards from the source using the parent set in each RefSCC,
+  // skipping any RefSCCs that don't fall in the postorder range. This has the
+  // advantage of walking the sparser parent edge (in high fan-out graphs) but
+  // more importantly this removes examining all forward edges in all RefSCCs
+  // within the postorder range which aren't in fact connected. Only connected
+  // RefSCCs (and their edges) are visited here.
+  auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+    Set.insert(&SourceC);
+    SmallVector<RefSCC *, 4> Worklist;
+    Worklist.push_back(&SourceC);
+    do {
+      RefSCC &RC = *Worklist.pop_back_val();
+      for (RefSCC &ParentRC : RC.parents()) {
+        // Skip any RefSCCs outside the range of source to target in the
+        // postorder sequence.
+        int ParentIdx = G->getRefSCCIndex(ParentRC);
+        assert(ParentIdx > SourceIdx && "Parent cannot precede source in postorder!");
+        if (ParentIdx > TargetIdx)
+          continue;
+        if (Set.insert(&ParentRC).second)
+          // First edge connecting to this parent, add it to our worklist.
+          Worklist.push_back(&ParentRC);
+      }
+    } while (!Worklist.empty());
+  };
 
-    while (I != E) {
-      RefSCC &Parent = *I++;
+  // Use a normal worklist to find which SCCs the target connects to. We still
+  // bound the search based on the range in the postorder list we care about,
+  // but because this is forward connectivity we just "recurse" through the
+  // edges.
+  auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+    Set.insert(this);
+    SmallVector<RefSCC *, 4> Worklist;
+    Worklist.push_back(this);
+    do {
+      RefSCC &RC = *Worklist.pop_back_val();
+      for (SCC &C : RC)
+        for (Node &N : C)
+          for (Edge &E : N) {
+            assert(E.getNode() && "Must have formed a node!");
+            RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode());
+            if (G->getRefSCCIndex(EdgeRC) <= SourceIdx)
+              // Not in the postorder sequence between source and target.
+              continue;
 
-      // If we have already processed this parent SCC, skip it, and remember
-      // whether it was connected so we don't have to check the rest of the
-      // stack. This also handles when we reach a child of the 'this' SCC (the
-      // callee) which terminates the search.
-      if (ConnectedSet.count(&Parent)) {
-        assert(ConnectedDepth < (int)DFSStack.size() &&
-               "Cannot have a connected depth greater than the DFS depth!");
-        ConnectedDepth = DFSStack.size();
-        continue;
-      }
-      if (Visited.count(&Parent))
-        continue;
+            if (Set.insert(&EdgeRC).second)
+              Worklist.push_back(&EdgeRC);
+          }
+    } while (!Worklist.empty());
+  };
 
-      // We fully explore the depth-first space, adding nodes to the connected
-      // set only as we pop them off, so "recurse" by rotating to the parent.
-      DFSStack.push_back({C, I});
-      C = &Parent;
-      I = C->parent_begin();
-      E = C->parent_end();
-    }
+  // Use a generic helper to update the postorder sequence of RefSCCs and return
+  // a range of any RefSCCs connected into a cycle by inserting this edge. This
+  // routine will also take care of updating the indices into the postorder
+  // sequence.
+  iterator_range<SmallVectorImpl<RefSCC *>::iterator> MergeRange =
+      updatePostorderSequenceForEdgeInsertion(
+          SourceC, *this, G->PostOrderRefSCCs, G->RefSCCIndices,
+          ComputeSourceConnectedSet, ComputeTargetConnectedSet);
 
-    // If we've found a connection anywhere below this point on the stack (and
-    // thus up the parent graph from the caller), the current node needs to be
-    // added to the connected set now that we've processed all of its parents.
-    if ((int)DFSStack.size() == ConnectedDepth) {
-      --ConnectedDepth; // We're finished with this connection.
-      bool Inserted = ConnectedSet.insert(C).second;
-      (void)Inserted;
-      assert(Inserted && "Cannot insert a refSCC multiple times!");
-      Connected.push_back(C);
-    } else {
-      // Otherwise remember that its parents don't ever connect.
-      assert(ConnectedDepth < (int)DFSStack.size() &&
-             "Cannot have a connected depth greater than the DFS depth!");
-      Visited.insert(C);
-    }
-  } while (!DFSStack.empty());
+  // Build a set so we can do fast tests for whether a merge is occuring.
+  SmallPtrSet<RefSCC *, 16> MergeSet(MergeRange.begin(), MergeRange.end());
 
   // Now that we have identified all of the SCCs which need to be merged into
   // a connected set with the inserted edge, merge all of them into this SCC.
-  // We walk the newly connected RefSCCs in the reverse postorder of the parent
-  // DAG walk above and merge in each of their SCC postorder lists. This
-  // ensures a merged postorder SCC list.
   SmallVector<SCC *, 16> MergedSCCs;
   int SCCIndex = 0;
-  for (RefSCC *C : reverse(Connected)) {
-    assert(C != this &&
-           "This RefSCC should terminate the DFS without being reached.");
+  for (RefSCC *RC : MergeRange) {
+    assert(RC != this && "We're merging into the target RefSCC, so it "
+                         "shouldn't be in the range.");
 
     // Merge the parents which aren't part of the merge into the our parents.
-    for (RefSCC *ParentC : C->Parents)
-      if (!ConnectedSet.count(ParentC))
-        Parents.insert(ParentC);
-    C->Parents.clear();
+    for (RefSCC *ParentRC : RC->Parents)
+      if (!MergeSet.count(ParentRC))
+        Parents.insert(ParentRC);
+    RC->Parents.clear();
 
     // Walk the inner SCCs to update their up-pointer and walk all the edges to
     // update any parent sets.
     // FIXME: We should try to find a way to avoid this (rather expensive) edge
     // walk by updating the parent sets in some other manner.
-    for (SCC &InnerC : *C) {
+    for (SCC &InnerC : *RC) {
       InnerC.OuterRefSCC = this;
       SCCIndices[&InnerC] = SCCIndex++;
       for (Node &N : InnerC) {
@@ -922,9 +913,9 @@ LazyCallGraph::RefSCC::insertIncomingRef
           assert(E.getNode() &&
                  "Cannot have a null node within a visited SCC!");
           RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
-          if (ConnectedSet.count(&ChildRC))
+          if (MergeSet.count(&ChildRC))
             continue;
-          ChildRC.Parents.erase(C);
+          ChildRC.Parents.erase(RC);
           ChildRC.Parents.insert(this);
         }
       }
@@ -933,19 +924,28 @@ LazyCallGraph::RefSCC::insertIncomingRef
     // Now merge in the SCCs. We can actually move here so try to reuse storage
     // the first time through.
     if (MergedSCCs.empty())
-      MergedSCCs = std::move(C->SCCs);
+      MergedSCCs = std::move(RC->SCCs);
     else
-      MergedSCCs.append(C->SCCs.begin(), C->SCCs.end());
-    C->SCCs.clear();
+      MergedSCCs.append(RC->SCCs.begin(), RC->SCCs.end());
+    RC->SCCs.clear();
+    DeletedRefSCCs.push_back(RC);
   }
 
-  // Finally append our original SCCs to the merged list and move it into
-  // place.
+  // Append our original SCCs to the merged list and move it into place.
   for (SCC &InnerC : *this)
     SCCIndices[&InnerC] = SCCIndex++;
   MergedSCCs.append(SCCs.begin(), SCCs.end());
   SCCs = std::move(MergedSCCs);
 
+  // Remove the merged away RefSCCs from the post order sequence.
+  for (RefSCC *RC : MergeRange)
+    G->RefSCCIndices.erase(RC);
+  int IndexOffset = MergeRange.end() - MergeRange.begin();
+  auto EraseEnd =
+      G->PostOrderRefSCCs.erase(MergeRange.begin(), MergeRange.end());
+  for (RefSCC *RC : make_range(EraseEnd, G->PostOrderRefSCCs.end()))
+    G->RefSCCIndices[RC] -= IndexOffset;
+
   // At this point we have a merged RefSCC with a post-order SCCs list, just
   // connect the nodes to form the new edge.
   SourceN.insertEdgeInternal(TargetN, Edge::Ref);
@@ -954,7 +954,7 @@ LazyCallGraph::RefSCC::insertIncomingRef
   // invalidate any data they have associated with those SCCs. Note that these
   // SCCs are no longer in an interesting state (they are totally empty) but
   // the pointers will remain stable for the life of the graph itself.
-  return Connected;
+  return DeletedRefSCCs;
 }
 
 void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
@@ -1218,6 +1218,25 @@ LazyCallGraph::RefSCC::removeInternalRef
   for (int i = 1; i < PostOrderNumber; ++i)
     Result.push_back(G->createRefSCC(*G));
 
+  // Insert the resulting postorder sequence into the global graph postorder
+  // sequence before the current RefSCC in that sequence. The idea being that
+  // this RefSCC is the target of the reference edge removed, and thus has
+  // a direct or indirect edge to every other RefSCC formed and so must be at
+  // the end of any postorder traversal.
+  //
+  // FIXME: It'd be nice to change the APIs so that we returned an iterator
+  // range over the global postorder sequence and generally use that sequence
+  // rather than building a separate result vector here.
+  if (!Result.empty()) {
+    int Idx = G->getRefSCCIndex(*this);
+    G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx,
+                               Result.begin(), Result.end());
+    for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size()))
+      G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i;
+    assert(G->PostOrderRefSCCs[G->getRefSCCIndex(*this)] == this &&
+           "Failed to update this RefSCC's index after insertion!");
+  }
+
   for (SCC *C : SCCs) {
     auto PostOrderI = PostOrderMapping.find(&*C->begin());
     assert(PostOrderI != PostOrderMapping.end() &&
@@ -1486,14 +1505,14 @@ void LazyCallGraph::connectRefSCC(RefSCC
     LeafRefSCCs.push_back(&RC);
 }
 
-LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
+bool LazyCallGraph::buildNextRefSCCInPostOrder() {
   if (DFSStack.empty()) {
     Node *N;
     do {
       // If we've handled all candidate entry nodes to the SCC forest, we're
       // done.
       if (RefSCCEntryNodes.empty())
-        return nullptr;
+        return false;
 
       N = &get(*RefSCCEntryNodes.pop_back_val());
     } while (N->DFSNumber != 0);
@@ -1575,9 +1594,14 @@ LazyCallGraph::RefSCC *LazyCallGraph::ge
     PendingRefSCCStack.erase(RefSCCNodes.end().base(),
                              PendingRefSCCStack.end());
 
-    // We return the new node here. This essentially suspends the DFS walk
-    // until another RefSCC is requested.
-    return NewRC;
+    // Push the new node into the postorder list and return true indicating we
+    // successfully grew the postorder sequence by one.
+    bool Inserted =
+        RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
+    (void)Inserted;
+    assert(Inserted && "Cannot already have this RefSCC in the index map!");
+    PostOrderRefSCCs.push_back(NewRC);
+    return true;
   }
 }
 

Modified: llvm/trunk/unittests/Analysis/LazyCallGraphTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Analysis/LazyCallGraphTest.cpp?rev=281716&r1=281715&r2=281716&view=diff
==============================================================================
--- llvm/trunk/unittests/Analysis/LazyCallGraphTest.cpp (original)
+++ llvm/trunk/unittests/Analysis/LazyCallGraphTest.cpp Fri Sep 16 05:20:17 2016
@@ -120,6 +120,101 @@ static const char DiamondOfTriangles[] =
      "  ret void\n"
      "}\n";
 
+/*
+   IR forming a reference graph with a diamond of triangle-shaped RefSCCs
+
+           d1
+          /  \
+         d3--d2
+        /     \
+       b1     c1
+     /  \    /  \
+    b3--b2  c3--c2
+         \  /
+          a1
+         /  \
+        a3--a2
+
+   All call edges go up between RefSCCs, and clockwise around the RefSCC.
+ */
+static const char DiamondOfTrianglesRefGraph[] =
+     "define void @a1() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @a2, void ()** %a\n"
+     "  store void ()* @b2, void ()** %a\n"
+     "  store void ()* @c3, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @a2() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @a3, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @a3() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @a1, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @b1() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @b2, void ()** %a\n"
+     "  store void ()* @d3, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @b2() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @b3, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @b3() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @b1, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @c1() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @c2, void ()** %a\n"
+     "  store void ()* @d2, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @c2() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @c3, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @c3() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @c1, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @d1() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @d2, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @d2() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @d3, void ()** %a\n"
+     "  ret void\n"
+     "}\n"
+     "define void @d3() {\n"
+     "entry:\n"
+     "  %a = alloca void ()*\n"
+     "  store void ()* @d1, void ()** %a\n"
+     "  ret void\n"
+     "}\n";
+
 TEST(LazyCallGraphTest, BasicGraphFormation) {
   LLVMContext Context;
   std::unique_ptr<Module> M = parseAssembly(Context, DiamondOfTriangles);
@@ -220,6 +315,7 @@ TEST(LazyCallGraphTest, BasicGraphFormat
   EXPECT_FALSE(D.isChildOf(D));
   EXPECT_FALSE(D.isAncestorOf(D));
   EXPECT_FALSE(D.isDescendantOf(D));
+  EXPECT_EQ(&D, &*CG.postorder_ref_scc_begin());
 
   LazyCallGraph::RefSCC &C = *J++;
   ASSERT_EQ(1, C.size());
@@ -235,6 +331,7 @@ TEST(LazyCallGraphTest, BasicGraphFormat
   EXPECT_FALSE(C.isChildOf(D));
   EXPECT_TRUE(C.isAncestorOf(D));
   EXPECT_FALSE(C.isDescendantOf(D));
+  EXPECT_EQ(&C, &*std::next(CG.postorder_ref_scc_begin()));
 
   LazyCallGraph::RefSCC &B = *J++;
   ASSERT_EQ(1, B.size());
@@ -252,6 +349,7 @@ TEST(LazyCallGraphTest, BasicGraphFormat
   EXPECT_FALSE(B.isDescendantOf(D));
   EXPECT_FALSE(B.isAncestorOf(C));
   EXPECT_FALSE(C.isAncestorOf(B));
+  EXPECT_EQ(&B, &*std::next(CG.postorder_ref_scc_begin(), 2));
 
   LazyCallGraph::RefSCC &A = *J++;
   ASSERT_EQ(1, A.size());
@@ -269,8 +367,10 @@ TEST(LazyCallGraphTest, BasicGraphFormat
   EXPECT_TRUE(A.isAncestorOf(B));
   EXPECT_TRUE(A.isAncestorOf(C));
   EXPECT_TRUE(A.isAncestorOf(D));
+  EXPECT_EQ(&A, &*std::next(CG.postorder_ref_scc_begin(), 3));
 
   EXPECT_EQ(CG.postorder_ref_scc_end(), J);
+  EXPECT_EQ(J, std::next(CG.postorder_ref_scc_begin(), 4));
 }
 
 static Function &lookupFunction(Module &M, StringRef Name) {
@@ -478,7 +578,7 @@ TEST(LazyCallGraphTest, OutgoingEdgeMuta
 
   // Force the graph to be fully expanded.
   for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs())
-    (void)RC;
+    dbgs() << "Formed RefSCC: " << RC << "\n";
 
   LazyCallGraph::Node &A = *CG.lookup(lookupFunction(*M, "a"));
   LazyCallGraph::Node &B = *CG.lookup(lookupFunction(*M, "b"));
@@ -599,7 +699,7 @@ TEST(LazyCallGraphTest, IncomingEdgeInse
 
   // Force the graph to be fully expanded.
   for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs())
-    (void)RC;
+    dbgs() << "Formed RefSCC: " << RC << "\n";
 
   LazyCallGraph::Node &A1 = *CG.lookup(lookupFunction(*M, "a1"));
   LazyCallGraph::Node &A2 = *CG.lookup(lookupFunction(*M, "a2"));
@@ -668,6 +768,16 @@ TEST(LazyCallGraphTest, IncomingEdgeInse
   // And that ancestry tests have been updated.
   EXPECT_TRUE(ARC.isParentOf(CRC));
   EXPECT_TRUE(BRC.isParentOf(CRC));
+
+  // And verify the post-order walk reflects the updated structure.
+  auto I = CG.postorder_ref_scc_begin(), E = CG.postorder_ref_scc_end();
+  ASSERT_NE(I, E);
+  EXPECT_EQ(&CRC, &*I) << "Actual RefSCC: " << *I;
+  ASSERT_NE(++I, E);
+  EXPECT_EQ(&BRC, &*I) << "Actual RefSCC: " << *I;
+  ASSERT_NE(++I, E);
+  EXPECT_EQ(&ARC, &*I) << "Actual RefSCC: " << *I;
+  EXPECT_EQ(++I, E);
 }
 
 TEST(LazyCallGraphTest, IncomingEdgeInsertionMidTraversal) {
@@ -726,16 +836,30 @@ TEST(LazyCallGraphTest, IncomingEdgeInse
   EXPECT_EQ(&CRC, CG.lookupRefSCC(D2));
   EXPECT_EQ(&CRC, CG.lookupRefSCC(D3));
 
-  // Check that we can form the last two RefSCCs now in a coherent way.
-  ++I;
-  EXPECT_NE(I, E);
-  LazyCallGraph::RefSCC &BRC = *I;
+  // Verify that the post-order walk reflects the updated but still incomplete
+  // structure.
+  auto J = CG.postorder_ref_scc_begin();
+  EXPECT_NE(J, E);
+  EXPECT_EQ(&CRC, &*J) << "Actual RefSCC: " << *J;
+  EXPECT_EQ(I, J);
+
+  // Check that we can form the last two RefSCCs now, and even that we can do
+  // it with alternating iterators.
+  ++J;
+  EXPECT_NE(J, E);
+  LazyCallGraph::RefSCC &BRC = *J;
   EXPECT_NE(&BRC, nullptr);
   EXPECT_EQ(&BRC, CG.lookupRefSCC(*CG.lookup(lookupFunction(*M, "b1"))));
   EXPECT_EQ(&BRC, CG.lookupRefSCC(*CG.lookup(lookupFunction(*M, "b2"))));
   EXPECT_EQ(&BRC, CG.lookupRefSCC(*CG.lookup(lookupFunction(*M, "b3"))));
   EXPECT_TRUE(BRC.isParentOf(CRC));
   ++I;
+  EXPECT_EQ(J, I);
+  EXPECT_EQ(&BRC, &*I) << "Actual RefSCC: " << *I;
+
+  // Increment I this time to form the new RefSCC, flopping back to the first
+  // iterator.
+  ++I;
   EXPECT_NE(I, E);
   LazyCallGraph::RefSCC &ARC = *I;
   EXPECT_NE(&ARC, nullptr);
@@ -743,8 +867,242 @@ TEST(LazyCallGraphTest, IncomingEdgeInse
   EXPECT_EQ(&ARC, CG.lookupRefSCC(*CG.lookup(lookupFunction(*M, "a2"))));
   EXPECT_EQ(&ARC, CG.lookupRefSCC(*CG.lookup(lookupFunction(*M, "a3"))));
   EXPECT_TRUE(ARC.isParentOf(CRC));
+  ++J;
+  EXPECT_EQ(I, J);
+  EXPECT_EQ(&ARC, &*J) << "Actual RefSCC: " << *J;
   ++I;
   EXPECT_EQ(E, I);
+  ++J;
+  EXPECT_EQ(E, J);
+}
+
+TEST(LazyCallGraphTest, IncomingEdgeInsertionRefGraph) {
+  LLVMContext Context;
+  // Another variation of the above test but with all the edges switched to
+  // references rather than calls.
+  std::unique_ptr<Module> M =
+      parseAssembly(Context, DiamondOfTrianglesRefGraph);
+  LazyCallGraph CG(*M);
+
+  // Force the graph to be fully expanded.
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs())
+    dbgs() << "Formed RefSCC: " << RC << "\n";
+
+  LazyCallGraph::Node &A1 = *CG.lookup(lookupFunction(*M, "a1"));
+  LazyCallGraph::Node &A2 = *CG.lookup(lookupFunction(*M, "a2"));
+  LazyCallGraph::Node &A3 = *CG.lookup(lookupFunction(*M, "a3"));
+  LazyCallGraph::Node &B1 = *CG.lookup(lookupFunction(*M, "b1"));
+  LazyCallGraph::Node &B2 = *CG.lookup(lookupFunction(*M, "b2"));
+  LazyCallGraph::Node &B3 = *CG.lookup(lookupFunction(*M, "b3"));
+  LazyCallGraph::Node &C1 = *CG.lookup(lookupFunction(*M, "c1"));
+  LazyCallGraph::Node &C2 = *CG.lookup(lookupFunction(*M, "c2"));
+  LazyCallGraph::Node &C3 = *CG.lookup(lookupFunction(*M, "c3"));
+  LazyCallGraph::Node &D1 = *CG.lookup(lookupFunction(*M, "d1"));
+  LazyCallGraph::Node &D2 = *CG.lookup(lookupFunction(*M, "d2"));
+  LazyCallGraph::Node &D3 = *CG.lookup(lookupFunction(*M, "d3"));
+  LazyCallGraph::RefSCC &ARC = *CG.lookupRefSCC(A1);
+  LazyCallGraph::RefSCC &BRC = *CG.lookupRefSCC(B1);
+  LazyCallGraph::RefSCC &CRC = *CG.lookupRefSCC(C1);
+  LazyCallGraph::RefSCC &DRC = *CG.lookupRefSCC(D1);
+  ASSERT_EQ(&ARC, CG.lookupRefSCC(A2));
+  ASSERT_EQ(&ARC, CG.lookupRefSCC(A3));
+  ASSERT_EQ(&BRC, CG.lookupRefSCC(B2));
+  ASSERT_EQ(&BRC, CG.lookupRefSCC(B3));
+  ASSERT_EQ(&CRC, CG.lookupRefSCC(C2));
+  ASSERT_EQ(&CRC, CG.lookupRefSCC(C3));
+  ASSERT_EQ(&DRC, CG.lookupRefSCC(D2));
+  ASSERT_EQ(&DRC, CG.lookupRefSCC(D3));
+  ASSERT_EQ(1, std::distance(D2.begin(), D2.end()));
+
+  // Add an edge to make the graph:
+  //
+  //         d1         |
+  //        /  \        |
+  //       d3--d2---.   |
+  //      /     \    |  |
+  //     b1     c1   |  |
+  //   /  \    /  \ /   |
+  //  b3--b2  c3--c2    |
+  //       \  /         |
+  //        a1          |
+  //       /  \         |
+  //      a3--a2        |
+  auto MergedRCs = CRC.insertIncomingRefEdge(D2, C2);
+  // Make sure we connected the nodes.
+  for (LazyCallGraph::Edge E : D2) {
+    if (E.getNode() == &D3)
+      continue;
+    EXPECT_EQ(&C2, E.getNode());
+  }
+  // And marked the D ref-SCC as no longer valid.
+  EXPECT_EQ(1u, MergedRCs.size());
+  EXPECT_EQ(&DRC, MergedRCs[0]);
+
+  // Make sure we have the correct nodes in the SCC sets.
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(A1));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(A2));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(A3));
+  EXPECT_EQ(&BRC, CG.lookupRefSCC(B1));
+  EXPECT_EQ(&BRC, CG.lookupRefSCC(B2));
+  EXPECT_EQ(&BRC, CG.lookupRefSCC(B3));
+  EXPECT_EQ(&CRC, CG.lookupRefSCC(C1));
+  EXPECT_EQ(&CRC, CG.lookupRefSCC(C2));
+  EXPECT_EQ(&CRC, CG.lookupRefSCC(C3));
+  EXPECT_EQ(&CRC, CG.lookupRefSCC(D1));
+  EXPECT_EQ(&CRC, CG.lookupRefSCC(D2));
+  EXPECT_EQ(&CRC, CG.lookupRefSCC(D3));
+
+  // And that ancestry tests have been updated.
+  EXPECT_TRUE(ARC.isParentOf(CRC));
+  EXPECT_TRUE(BRC.isParentOf(CRC));
+
+  // And verify the post-order walk reflects the updated structure.
+  auto I = CG.postorder_ref_scc_begin(), E = CG.postorder_ref_scc_end();
+  ASSERT_NE(I, E);
+  EXPECT_EQ(&CRC, &*I) << "Actual RefSCC: " << *I;
+  ASSERT_NE(++I, E);
+  EXPECT_EQ(&BRC, &*I) << "Actual RefSCC: " << *I;
+  ASSERT_NE(++I, E);
+  EXPECT_EQ(&ARC, &*I) << "Actual RefSCC: " << *I;
+  EXPECT_EQ(++I, E);
+}
+
+TEST(LazyCallGraphTest, IncomingEdgeInsertionLargeCallCycle) {
+  LLVMContext Context;
+  std::unique_ptr<Module> M = parseAssembly(Context, "define void @a() {\n"
+                                                     "entry:\n"
+                                                     "  call void @b()\n"
+                                                     "  ret void\n"
+                                                     "}\n"
+                                                     "define void @b() {\n"
+                                                     "entry:\n"
+                                                     "  call void @c()\n"
+                                                     "  ret void\n"
+                                                     "}\n"
+                                                     "define void @c() {\n"
+                                                     "entry:\n"
+                                                     "  call void @d()\n"
+                                                     "  ret void\n"
+                                                     "}\n"
+                                                     "define void @d() {\n"
+                                                     "entry:\n"
+                                                     "  ret void\n"
+                                                     "}\n");
+  LazyCallGraph CG(*M);
+
+  // Force the graph to be fully expanded.
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs())
+    dbgs() << "Formed RefSCC: " << RC << "\n";
+
+  LazyCallGraph::Node &A = *CG.lookup(lookupFunction(*M, "a"));
+  LazyCallGraph::Node &B = *CG.lookup(lookupFunction(*M, "b"));
+  LazyCallGraph::Node &C = *CG.lookup(lookupFunction(*M, "c"));
+  LazyCallGraph::Node &D = *CG.lookup(lookupFunction(*M, "d"));
+  LazyCallGraph::SCC &AC = *CG.lookupSCC(A);
+  LazyCallGraph::SCC &BC = *CG.lookupSCC(B);
+  LazyCallGraph::SCC &CC = *CG.lookupSCC(C);
+  LazyCallGraph::SCC &DC = *CG.lookupSCC(D);
+  LazyCallGraph::RefSCC &ARC = *CG.lookupRefSCC(A);
+  LazyCallGraph::RefSCC &BRC = *CG.lookupRefSCC(B);
+  LazyCallGraph::RefSCC &CRC = *CG.lookupRefSCC(C);
+  LazyCallGraph::RefSCC &DRC = *CG.lookupRefSCC(D);
+
+  // Connect the top to the bottom forming a large RefSCC made up mostly of calls.
+  auto MergedRCs = ARC.insertIncomingRefEdge(D, A);
+  // Make sure we connected the nodes.
+  EXPECT_NE(D.begin(), D.end());
+  EXPECT_EQ(&A, D.begin()->getNode());
+
+  // Check that we have the dead RCs, but ignore the order.
+  EXPECT_EQ(3u, MergedRCs.size());
+  EXPECT_NE(find(MergedRCs, &BRC), MergedRCs.end());
+  EXPECT_NE(find(MergedRCs, &CRC), MergedRCs.end());
+  EXPECT_NE(find(MergedRCs, &DRC), MergedRCs.end());
+
+  // Make sure the nodes point to the right place now.
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(A));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(B));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(C));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(D));
+
+  // Check that the SCCs are in postorder.
+  EXPECT_EQ(4, ARC.size());
+  EXPECT_EQ(&DC, &ARC[0]);
+  EXPECT_EQ(&CC, &ARC[1]);
+  EXPECT_EQ(&BC, &ARC[2]);
+  EXPECT_EQ(&AC, &ARC[3]);
+
+  // And verify the post-order walk reflects the updated structure.
+  auto I = CG.postorder_ref_scc_begin(), E = CG.postorder_ref_scc_end();
+  ASSERT_NE(I, E);
+  EXPECT_EQ(&ARC, &*I) << "Actual RefSCC: " << *I;
+  EXPECT_EQ(++I, E);
+}
+
+TEST(LazyCallGraphTest, IncomingEdgeInsertionLargeRefCycle) {
+  LLVMContext Context;
+  std::unique_ptr<Module> M =
+      parseAssembly(Context, "define void @a() {\n"
+                             "entry:\n"
+                             "  %p = alloca void ()*\n"
+                             "  store void ()* @b, void ()** %p\n"
+                             "  ret void\n"
+                             "}\n"
+                             "define void @b() {\n"
+                             "entry:\n"
+                             "  %p = alloca void ()*\n"
+                             "  store void ()* @c, void ()** %p\n"
+                             "  ret void\n"
+                             "}\n"
+                             "define void @c() {\n"
+                             "entry:\n"
+                             "  %p = alloca void ()*\n"
+                             "  store void ()* @d, void ()** %p\n"
+                             "  ret void\n"
+                             "}\n"
+                             "define void @d() {\n"
+                             "entry:\n"
+                             "  ret void\n"
+                             "}\n");
+  LazyCallGraph CG(*M);
+
+  // Force the graph to be fully expanded.
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs())
+    dbgs() << "Formed RefSCC: " << RC << "\n";
+
+  LazyCallGraph::Node &A = *CG.lookup(lookupFunction(*M, "a"));
+  LazyCallGraph::Node &B = *CG.lookup(lookupFunction(*M, "b"));
+  LazyCallGraph::Node &C = *CG.lookup(lookupFunction(*M, "c"));
+  LazyCallGraph::Node &D = *CG.lookup(lookupFunction(*M, "d"));
+  LazyCallGraph::RefSCC &ARC = *CG.lookupRefSCC(A);
+  LazyCallGraph::RefSCC &BRC = *CG.lookupRefSCC(B);
+  LazyCallGraph::RefSCC &CRC = *CG.lookupRefSCC(C);
+  LazyCallGraph::RefSCC &DRC = *CG.lookupRefSCC(D);
+
+  // Connect the top to the bottom forming a large RefSCC made up just of
+  // references.
+  auto MergedRCs = ARC.insertIncomingRefEdge(D, A);
+  // Make sure we connected the nodes.
+  EXPECT_NE(D.begin(), D.end());
+  EXPECT_EQ(&A, D.begin()->getNode());
+
+  // Check that we have the dead RCs, but ignore the order.
+  EXPECT_EQ(3u, MergedRCs.size());
+  EXPECT_NE(find(MergedRCs, &BRC), MergedRCs.end());
+  EXPECT_NE(find(MergedRCs, &CRC), MergedRCs.end());
+  EXPECT_NE(find(MergedRCs, &DRC), MergedRCs.end());
+
+  // Make sure the nodes point to the right place now.
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(A));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(B));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(C));
+  EXPECT_EQ(&ARC, CG.lookupRefSCC(D));
+
+  // And verify the post-order walk reflects the updated structure.
+  auto I = CG.postorder_ref_scc_begin(), End = CG.postorder_ref_scc_end();
+  ASSERT_NE(I, End);
+  EXPECT_EQ(&ARC, &*I) << "Actual RefSCC: " << *I;
+  EXPECT_EQ(++I, End);
 }
 
 TEST(LazyCallGraphTest, InternalEdgeMutation) {
@@ -855,9 +1213,9 @@ TEST(LazyCallGraphTest, InternalEdgeRemo
   LazyCallGraph CG(*M);
 
   // Force the graph to be fully expanded.
-  auto I = CG.postorder_ref_scc_begin();
-  LazyCallGraph::RefSCC &RC = *I++;
-  EXPECT_EQ(CG.postorder_ref_scc_end(), I);
+  auto I = CG.postorder_ref_scc_begin(), E = CG.postorder_ref_scc_end();
+  LazyCallGraph::RefSCC &RC = *I;
+  EXPECT_EQ(E, std::next(I));
 
   LazyCallGraph::Node &A = *CG.lookup(lookupFunction(*M, "a"));
   LazyCallGraph::Node &B = *CG.lookup(lookupFunction(*M, "b"));
@@ -874,6 +1232,10 @@ TEST(LazyCallGraphTest, InternalEdgeRemo
   EXPECT_EQ(&RC, CG.lookupRefSCC(A));
   EXPECT_EQ(&RC, CG.lookupRefSCC(B));
   EXPECT_EQ(&RC, CG.lookupRefSCC(C));
+  auto J = CG.postorder_ref_scc_begin();
+  EXPECT_EQ(I, J);
+  EXPECT_EQ(&RC, &*J);
+  EXPECT_EQ(E, std::next(J));
 
   // Remove the edge from c -> a, which should leave 'a' in the original RefSCC
   // and form a new RefSCC for 'b' and 'c'.
@@ -881,9 +1243,19 @@ TEST(LazyCallGraphTest, InternalEdgeRemo
   EXPECT_EQ(1u, NewRCs.size());
   EXPECT_EQ(&RC, CG.lookupRefSCC(A));
   EXPECT_EQ(1, std::distance(RC.begin(), RC.end()));
-  LazyCallGraph::RefSCC *RC2 = CG.lookupRefSCC(B);
-  EXPECT_EQ(RC2, CG.lookupRefSCC(C));
-  EXPECT_EQ(RC2, NewRCs[0]);
+  LazyCallGraph::RefSCC &RC2 = *CG.lookupRefSCC(B);
+  EXPECT_EQ(&RC2, CG.lookupRefSCC(C));
+  EXPECT_EQ(&RC2, NewRCs[0]);
+  J = CG.postorder_ref_scc_begin();
+  EXPECT_NE(I, J);
+  EXPECT_EQ(&RC2, &*J);
+  ++J;
+  EXPECT_EQ(I, J);
+  EXPECT_EQ(&RC, &*J);
+  ++I;
+  EXPECT_EQ(E, I);
+  ++J;
+  EXPECT_EQ(E, J);
 }
 
 TEST(LazyCallGraphTest, InternalCallEdgeToRef) {