[llvm] [MemProf] Merge callee clones as needed before assigning functions (PR #135702)

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 17 08:24:34 PDT 2025


================
@@ -4016,6 +4040,306 @@ IndexCallsiteContextGraph::cloneFunctionForCallsite(
   return {Func.func(), CloneNo};
 }
 
+// We perform cloning for each allocation node separately. However, this
+// sometimes results in a situation where the same node calls multiple
+// clones of the same callee, created for different allocations. This
+// causes issues when assigning functions to these clones, as each node can
+// in reality only call a single callee clone.
+//
+// To address this, before assigning functions, merge callee clone nodes as
+// needed using a post order traversal from the allocations. We attempt to
+// use existing clones as the merge node when legal, and to share them
+// among callers with the same properties (callers calling the same set of
+// callee clone nodes for the same allocations).
+//
+// Without this fix, in some cases incorrect function assignment will lead
+// to calling the wrong allocation clone.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
+  if (!MergeClones)
+    return;
+
+  // Generate a map from context id to the associated allocation node for use
+  // when merging clones.
+  DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
+  for (auto &Entry : AllocationCallToContextNodeMap) {
+    auto *Node = Entry.second;
+    for (auto Id : Node->getContextIds())
+      ContextIdToAllocationNode[Id] = Node->getOrigNode();
+    for (auto *Clone : Node->Clones) {
+      for (auto Id : Clone->getContextIds())
+        ContextIdToAllocationNode[Id] = Clone->getOrigNode();
+    }
+  }
+
+  // Post order traversal starting from allocations to ensure each callsite
+  // calls a single clone of its callee. Callee nodes that are clones of each
+  // other are merged (via new merge nodes if needed) to achieve this.
+  DenseSet<const ContextNode *> Visited;
+  for (auto &Entry : AllocationCallToContextNodeMap) {
+    auto *Node = Entry.second;
+
+    mergeClones(Node, Visited, ContextIdToAllocationNode);
+
+    // Make a copy so the recursive post order traversal that may create new
+    // clones doesn't mess up iteration.
+    auto Clones = Node->Clones;
+    for (auto *Clone : Clones)
+      mergeClones(Clone, Visited, ContextIdToAllocationNode);
+  }
+
+  if (DumpCCG) {
+    dbgs() << "CCG after merging:\n";
+    dbgs() << *this;
+  }
+  if (ExportToDot)
+    exportToDot("aftermerge");
+
+  if (VerifyCCG) {
+    check();
+  }
+}
+
+// Recursive helper for above mergeClones method.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
+    ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+    DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
+  auto Inserted = Visited.insert(Node);
+  if (!Inserted.second)
+    return;
+
+  // Make a copy since the recursive call may move a caller edge to a new
+  // callee, messing up the iterator.
+  auto CallerEdges = Node->CallerEdges;
+  for (auto CallerEdge : CallerEdges) {
+    // Skip any caller edge moved onto a different callee during recursion.
+    if (CallerEdge->Callee != Node)
+      continue;
+    mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
+  }
+
+  // Merge for this node after we handle its callers.
+  mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
+    ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+    DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
+  // Ignore Node if we moved all of its contexts to clones.
+  if (Node->emptyContextIds())
+    return;
+
+  // First identify groups of clones among Node's callee edges, by building
+  // a map from each callee base node to the associated callee edges from Node.
+  MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
+      OrigNodeToCloneEdges;
+  for (const auto &E : Node->CalleeEdges) {
+    auto *Callee = E->Callee;
+    if (!Callee->CloneOf && Callee->Clones.empty())
+      continue;
+    ContextNode *Base = Callee->getOrigNode();
+    OrigNodeToCloneEdges[Base].push_back(E);
+  }
+
+  // Process each set of callee clones called by Node, performing the needed
+  // merging.
+  for (auto Entry : OrigNodeToCloneEdges) {
+    // CalleeEdges is the set of edges from Node reaching callees that are
+    // mutual clones of each other.
+    auto CalleeEdges = Entry.second;
+    auto NumCalleeClones = CalleeEdges.size();
+    // A single edge means there is no merging needed.
+    if (NumCalleeClones == 1)
+      continue;
+    // Sort the CalleeEdges calling this group of clones in ascending order of
+    // their caller edge counts, putting the original non-clone node first in
+    // cases of a tie. This simplifies finding an existing node to use as the
+    // merge node.
+    std::stable_sort(CalleeEdges.begin(), CalleeEdges.end(),
+                     [&](const std::shared_ptr<ContextEdge> &A,
----------------
snehasish wrote:

I don't think we need to capture anything in this lambda?

Also maybe move it up as a separate object to reduce the indentation and make it easier to read.

https://github.com/llvm/llvm-project/pull/135702


More information about the llvm-commits mailing list