[llvm] [MemProf] Merge callee clones as needed before assigning functions (PR #135702)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 17 10:21:44 PDT 2025


================
@@ -4016,6 +4040,306 @@ IndexCallsiteContextGraph::cloneFunctionForCallsite(
   return {Func.func(), CloneNo};
 }
 
+// We perform cloning for each allocation node separately. However, this
+// sometimes results in a situation where the same node calls multiple
+// clones of the same callee, created for different allocations. This
+// causes issues when assigning functions to these clones, as each node can
+// in reality only call a single callee clone.
+//
+// To address this, before assigning functions, merge callee clone nodes as
+// needed using a post order traversal from the allocations. We attempt to
+// use existing clones as the merge node when legal, and to share them
+// among callers with the same properties (callers calling the same set of
+// callee clone nodes for the same allocations).
+//
+// Without this fix, in some cases incorrect function assignment will lead
+// to calling the wrong allocation clone.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
+  if (!MergeClones)
+    return;
+
+  // Generate a map from context id to the associated allocation node for use
+  // when merging clones.
+  DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
+  for (auto &Entry : AllocationCallToContextNodeMap) {
+    auto *Node = Entry.second;
+    for (auto Id : Node->getContextIds())
+      ContextIdToAllocationNode[Id] = Node->getOrigNode();
+    for (auto *Clone : Node->Clones) {
+      for (auto Id : Clone->getContextIds())
+        ContextIdToAllocationNode[Id] = Clone->getOrigNode();
+    }
+  }
+
+  // Post order traversal starting from allocations to ensure each callsite
+  // calls a single clone of its callee. Callee nodes that are clones of each
+  // other are merged (via new merge nodes if needed) to achieve this.
+  DenseSet<const ContextNode *> Visited;
+  for (auto &Entry : AllocationCallToContextNodeMap) {
+    auto *Node = Entry.second;
+
+    mergeClones(Node, Visited, ContextIdToAllocationNode);
+
+    // Make a copy so the recursive post order traversal that may create new
+    // clones doesn't mess up iteration.
+    auto Clones = Node->Clones;
+    for (auto *Clone : Clones)
+      mergeClones(Clone, Visited, ContextIdToAllocationNode);
+  }
+
+  if (DumpCCG) {
+    dbgs() << "CCG after merging:\n";
+    dbgs() << *this;
+  }
+  if (ExportToDot)
+    exportToDot("aftermerge");
+
+  if (VerifyCCG) {
+    check();
+  }
+}
+
+// Recursive helper for above mergeClones method.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
+    ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+    DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
+  auto Inserted = Visited.insert(Node);
+  if (!Inserted.second)
+    return;
+
+  // Make a copy since the recursive call may move a caller edge to a new
+  // callee, messing up the iterator.
+  auto CallerEdges = Node->CallerEdges;
+  for (auto CallerEdge : CallerEdges) {
+    // Skip any caller edge moved onto a different callee during recursion.
+    if (CallerEdge->Callee != Node)
+      continue;
+    mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
+  }
+
+  // Merge for this node after we handle its callers.
+  mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
+    ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+    DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
+  // Ignore Node if we moved all of its contexts to clones.
+  if (Node->emptyContextIds())
+    return;
+
+  // First identify groups of clones among Node's callee edges, by building
+  // a map from each callee base node to the associated callee edges from Node.
+  MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
+      OrigNodeToCloneEdges;
+  for (const auto &E : Node->CalleeEdges) {
+    auto *Callee = E->Callee;
+    if (!Callee->CloneOf && Callee->Clones.empty())
+      continue;
+    ContextNode *Base = Callee->getOrigNode();
+    OrigNodeToCloneEdges[Base].push_back(E);
+  }
+
+  // Process each set of callee clones called by Node, performing the needed
+  // merging.
+  for (auto Entry : OrigNodeToCloneEdges) {
+    // CalleeEdges is the set of edges from Node reaching callees that are
+    // mutual clones of each other.
+    auto CalleeEdges = Entry.second;
+    auto NumCalleeClones = CalleeEdges.size();
+    // A single edge means there is no merging needed.
+    if (NumCalleeClones == 1)
+      continue;
+    // Sort the CalleeEdges calling this group of clones in ascending order of
+    // their caller edge counts, putting the original non-clone node first in
+    // cases of a tie. This simplifies finding an existing node to use as the
+    // merge node.
+    std::stable_sort(CalleeEdges.begin(), CalleeEdges.end(),
+                     [&](const std::shared_ptr<ContextEdge> &A,
+                         const std::shared_ptr<ContextEdge> &B) {
+                       if (A->Callee->CallerEdges.size() !=
+                           B->Callee->CallerEdges.size())
+                         return A->Callee->CallerEdges.size() <
+                                B->Callee->CallerEdges.size();
+                       if (A->Callee->CloneOf && !B->Callee->CloneOf)
+                         return true;
+                       else if (!A->Callee->CloneOf && B->Callee->CloneOf)
+                         return false;
+                       // Use the first context id for each edge as a
+                       // tie-breaker.
+                       return *A->ContextIds.begin() < *B->ContextIds.begin();
+                     });
+
+    // Next look for other nodes that have edges to the same set of callee
+    // clones as the current Node. Those can share the eventual merge node
+    // (reducing cloning and binary size overhead) iff:
+    // - they have edges to the same set of callee clones
+    // - each callee edge reaches a subset of the same allocations as Node's
+    //   corresponding edge to the same callee clone.
+    // The second requirement is to ensure that we don't undo any of the
+    // necessary cloning to distinguish contexts with different allocation
+    // behavior.
+    // FIXME: This is somewhat conservative, as we really just need to ensure
+    // that they don't reach the same allocations as contexts on edges from Node
+    // going to any of the *other* callee clones being merged. However, that
+    // requires more tracking and checking to get right.
+    //
+    // This map counts how many edges to the same callee clone exist for other
+    // caller nodes of each callee clone.
+    DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
+    // Counts the number of other caller nodes that have edges to all callee
+    // clones that don't violate the allocation context checking.
+    unsigned PossibleOtherCallerNodes = 0;
+    // We only need to look at other Caller nodes if the first callee edge has
+    // multiple callers (recall they are sorted in ascending order above).
+    if (CalleeEdges[0]->Callee->CallerEdges.size() > 1) {
+      // For each callee edge:
+      // - Collect the count of other caller nodes calling the same calles.
----------------
teresajohnson wrote:

done

https://github.com/llvm/llvm-project/pull/135702


More information about the llvm-commits mailing list