[llvm] [MemProf] Merge callee clones as needed before assigning functions (PR #135702)
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 08:24:34 PDT 2025
================
@@ -4016,6 +4040,306 @@ IndexCallsiteContextGraph::cloneFunctionForCallsite(
return {Func.func(), CloneNo};
}
+// We perform cloning for each allocation node separately. However, this
+// sometimes results in a situation where the same node calls multiple
+// clones of the same callee, created for different allocations. This
+// causes issues when assigning functions to these clones, as each node can
+// in reality only call a single callee clone.
+//
+// To address this, before assigning functions, merge callee clone nodes as
+// needed using a post order traversal from the allocations. We attempt to
+// use existing clones as the merge node when legal, and to share them
+// among callers with the same properties (callers calling the same set of
+// callee clone nodes for the same allocations).
+//
+// Without this fix, in some cases incorrect function assignment will lead
+// to calling the wrong allocation clone.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
+ if (!MergeClones)
+ return;
+
+ // Generate a map from context id to the associated allocation node for use
+ // when merging clones.
+ DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
+ for (auto &Entry : AllocationCallToContextNodeMap) {
+ auto *Node = Entry.second;
+ for (auto Id : Node->getContextIds())
+ ContextIdToAllocationNode[Id] = Node->getOrigNode();
+ for (auto *Clone : Node->Clones) {
+ for (auto Id : Clone->getContextIds())
+ ContextIdToAllocationNode[Id] = Clone->getOrigNode();
+ }
+ }
+
+ // Post order traversal starting from allocations to ensure each callsite
+ // calls a single clone of its callee. Callee nodes that are clones of each
+ // other are merged (via new merge nodes if needed) to achieve this.
+ DenseSet<const ContextNode *> Visited;
+ for (auto &Entry : AllocationCallToContextNodeMap) {
+ auto *Node = Entry.second;
+
+ mergeClones(Node, Visited, ContextIdToAllocationNode);
+
+ // Make a copy so the recursive post order traversal that may create new
+ // clones doesn't mess up iteration.
+ auto Clones = Node->Clones;
+ for (auto *Clone : Clones)
+ mergeClones(Clone, Visited, ContextIdToAllocationNode);
+ }
+
+ if (DumpCCG) {
+ dbgs() << "CCG after merging:\n";
+ dbgs() << *this;
+ }
+ if (ExportToDot)
+ exportToDot("aftermerge");
+
+ if (VerifyCCG) {
+ check();
+ }
+}
+
+// Recursive helper for above mergeClones method.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
+ ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+ DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
+ auto Inserted = Visited.insert(Node);
+ if (!Inserted.second)
+ return;
+
+ // Make a copy since the recursive call may move a caller edge to a new
+ // callee, messing up the iterator.
+ auto CallerEdges = Node->CallerEdges;
+ for (auto CallerEdge : CallerEdges) {
+ // Skip any caller edge moved onto a different callee during recursion.
+ if (CallerEdge->Callee != Node)
+ continue;
+ mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
+ }
+
+ // Merge for this node after we handle its callers.
+ mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
+ ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+ DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
+ // Ignore Node if we moved all of its contexts to clones.
+ if (Node->emptyContextIds())
+ return;
+
+ // First identify groups of clones among Node's callee edges, by building
+ // a map from each callee base node to the associated callee edges from Node.
+ MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
+ OrigNodeToCloneEdges;
+ for (const auto &E : Node->CalleeEdges) {
+ auto *Callee = E->Callee;
+ if (!Callee->CloneOf && Callee->Clones.empty())
+ continue;
+ ContextNode *Base = Callee->getOrigNode();
+ OrigNodeToCloneEdges[Base].push_back(E);
+ }
+
+ // Process each set of callee clones called by Node, performing the needed
+ // merging.
+ for (auto Entry : OrigNodeToCloneEdges) {
+ // CalleeEdges is the set of edges from Node reaching callees that are
+ // mutual clones of each other.
+ auto CalleeEdges = Entry.second;
+ auto NumCalleeClones = CalleeEdges.size();
+ // A single edge means there is no merging needed.
+ if (NumCalleeClones == 1)
+ continue;
+ // Sort the CalleeEdges calling this group of clones in ascending order of
+ // their caller edge counts, putting the original non-clone node first in
+ // cases of a tie. This simplifies finding an existing node to use as the
+ // merge node.
+ std::stable_sort(CalleeEdges.begin(), CalleeEdges.end(),
+ [&](const std::shared_ptr<ContextEdge> &A,
----------------
snehasish wrote:
I don't think we need to capture anything in this lambda?
Also maybe move it up as a separate object to reduce the indentation and make it easier to read.
https://github.com/llvm/llvm-project/pull/135702
More information about the llvm-commits
mailing list