[llvm-branch-commits] [llvm] 5a00383 - Revert "Revert "[MemProf] Reduce cloning overhead by sharing nodes when possi…"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 28 11:44:57 PDT 2024
Author: Teresa Johnson
Date: 2024-08-28T11:44:54-07:00
New Revision: 5a00383d7f192a2951e3add4d8ab1f918e7d58f8
URL: https://github.com/llvm/llvm-project/commit/5a00383d7f192a2951e3add4d8ab1f918e7d58f8
DIFF: https://github.com/llvm/llvm-project/commit/5a00383d7f192a2951e3add4d8ab1f918e7d58f8.diff
LOG: Revert "Revert "[MemProf] Reduce cloning overhead by sharing nodes when possi…"
This reverts commit 11aa31f595325d6b2dede3364e4b86d78fffe635.
Added:
Modified:
llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 66b68d5cd457fb..c9de9c964bba0a 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -242,9 +242,16 @@ class CallsiteContextGraph {
// recursion.
bool Recursive = false;
- // The corresponding allocation or interior call.
+ // The corresponding allocation or interior call. This is the primary call
+ // for which we have created this node.
CallInfo Call;
+ // List of other calls that can be treated the same as the primary call
+ // through cloning. I.e. located in the same function and have the same
+ // (possibly pruned) stack ids. They will be updated the same way as the
+ // primary call when assigning to function clones.
+ std::vector<CallInfo> MatchingCalls;
+
// For alloc nodes this is a unique id assigned when constructed, and for
// callsite stack nodes it is the original stack id when the node is
// constructed from the memprof MIB metadata on the alloc nodes. Note that
@@ -457,6 +464,9 @@ class CallsiteContextGraph {
/// iteration.
MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
+ /// Records the function each call is located in.
+ DenseMap<CallInfo, const FuncTy *> CallToFunc;
+
/// Map from callsite node to the enclosing caller function.
std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
@@ -474,7 +484,8 @@ class CallsiteContextGraph {
/// StackIdToMatchingCalls map.
void assignStackNodesPostOrder(
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
- DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
+ DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
+ DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
/// Duplicates the given set of context ids, updating the provided
/// map from each original id with the newly generated context ids,
@@ -1230,10 +1241,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
- assignStackNodesPostOrder(ContextNode *Node,
- DenseSet<const ContextNode *> &Visited,
- DenseMap<uint64_t, std::vector<CallContextInfo>>
- &StackIdToMatchingCalls) {
+ assignStackNodesPostOrder(
+ ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+ DenseMap<uint64_t, std::vector<CallContextInfo>>
+ &StackIdToMatchingCalls,
+ DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
auto Inserted = Visited.insert(Node);
if (!Inserted.second)
return;
@@ -1246,7 +1258,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// Skip any that have been removed during the recursion.
if (!Edge)
continue;
- assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
+ assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
+ CallToMatchingCall);
}
// If this node's stack id is in the map, update the graph to contain new
@@ -1289,8 +1302,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
// Skip any for which we didn't assign any ids, these don't get a node in
// the graph.
- if (SavedContextIds.empty())
+ if (SavedContextIds.empty()) {
+ // If this call has a matching call (located in the same function and
+ // having the same stack ids), simply add it to the context node created
+ // for its matching call earlier. These can be treated the same through
+ // cloning and get updated at the same time.
+ if (!CallToMatchingCall.contains(Call))
+ continue;
+ auto MatchingCall = CallToMatchingCall[Call];
+ assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
+ NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
+ Call);
continue;
+ }
assert(LastId == Ids.back());
@@ -1422,6 +1446,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// there is more than one call with the same stack ids. Their (possibly newly
// duplicated) context ids are saved in the StackIdToMatchingCalls map.
DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
+ // Save a map from each call to any that are found to match it. I.e. located
+ // in the same function and have the same (possibly pruned) stack ids. We use
+ // this to avoid creating extra graph nodes as they can be treated the same.
+ DenseMap<CallInfo, CallInfo> CallToMatchingCall;
for (auto &It : StackIdToMatchingCalls) {
auto &Calls = It.getSecond();
// Skip single calls with a single stack id. These don't need a new node.
@@ -1460,6 +1488,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
assert(!LastNodeContextIds.empty());
+ // Map from function to the first call from the below list (with matching
+ // stack ids) found in that function. Note that calls from
diff erent
+ // functions can have the same stack ids because this is the list of stack
+ // ids that had (possibly pruned) nodes after building the graph from the
+ // allocation MIBs.
+ DenseMap<const FuncTy *, CallInfo> FuncToCallMap;
+
for (unsigned I = 0; I < Calls.size(); I++) {
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
assert(SavedContextIds.empty());
@@ -1533,6 +1568,18 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
continue;
}
+ const FuncTy *CallFunc = CallToFunc[Call];
+
+ // If the prior call had the same stack ids this map would not be empty.
+ // Check if we already have a call that "matches" because it is located
+ // in the same function.
+ if (FuncToCallMap.contains(CallFunc)) {
+ // Record the matching call found for this call, and skip it. We
+ // will subsequently combine it into the same node.
+ CallToMatchingCall[Call] = FuncToCallMap[CallFunc];
+ continue;
+ }
+
// Check if the next set of stack ids is the same (since the Calls vector
// of tuples is sorted by the stack ids we can just look at the next one).
bool DuplicateContextIds = false;
@@ -1562,7 +1609,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
set_subtract(LastNodeContextIds, StackSequenceContextIds);
if (LastNodeContextIds.empty())
break;
- }
+ // No longer possibly in a sequence of calls with duplicate stack ids,
+ // clear the map.
+ FuncToCallMap.clear();
+ } else
+ // Record the call with its function, so we can locate it the next time
+ // we find a call from this function when processing the calls with the
+ // same stack ids.
+ FuncToCallMap[CallFunc] = Call;
}
}
@@ -1579,7 +1633,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// associated context ids over to the new nodes.
DenseSet<const ContextNode *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap)
- assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
+ assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
+ CallToMatchingCall);
if (VerifyCCG)
check();
}
@@ -1679,6 +1734,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
continue;
if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
CallsWithMetadata.push_back(&I);
+ CallToFunc[&I] = &F;
auto *AllocNode = addAllocNode(&I, &F);
auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
assert(CallsiteMD);
@@ -1700,8 +1756,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
I.setMetadata(LLVMContext::MD_callsite, nullptr);
}
// For callsite metadata, add to list for this function for later use.
- else if (I.getMetadata(LLVMContext::MD_callsite))
+ else if (I.getMetadata(LLVMContext::MD_callsite)) {
CallsWithMetadata.push_back(&I);
+ CallToFunc[&I] = &F;
+ }
}
}
if (!CallsWithMetadata.empty())
@@ -1756,8 +1814,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
// correlate properly in applyImport in the backends.
if (AN.MIBs.empty())
continue;
- CallsWithMetadata.push_back({&AN});
- auto *AllocNode = addAllocNode({&AN}, FS);
+ IndexCall AllocCall(&AN);
+ CallsWithMetadata.push_back(AllocCall);
+ CallToFunc[AllocCall] = FS;
+ auto *AllocNode = addAllocNode(AllocCall, FS);
// Pass an empty CallStack to the CallsiteContext (second)
// parameter, since for ThinLTO we already collapsed out the inlined
// stack ids on the allocation call during ModuleSummaryAnalysis.
@@ -1788,8 +1848,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
}
// For callsite metadata, add to list for this function for later use.
if (!FS->callsites().empty())
- for (auto &SN : FS->mutableCallsites())
- CallsWithMetadata.push_back({&SN});
+ for (auto &SN : FS->mutableCallsites()) {
+ IndexCall StackNodeCall(&SN);
+ CallsWithMetadata.push_back(StackNodeCall);
+ CallToFunc[StackNodeCall] = FS;
+ }
if (!CallsWithMetadata.empty())
FuncToCallsWithMetadata[FS] = CallsWithMetadata;
@@ -2225,6 +2288,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
if (Recursive)
OS << " (recursive)";
OS << "\n";
+ if (!MatchingCalls.empty()) {
+ OS << "\tMatchingCalls:\n";
+ for (auto &MatchingCall : MatchingCalls) {
+ OS << "\t";
+ MatchingCall.print(OS);
+ OS << "\n";
+ }
+ }
OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
OS << "\tContextIds:";
// Make a copy of the computed context ids that we can sort for stability.
@@ -2478,6 +2549,7 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
ContextNode *Clone = NodeOwner.back().get();
Node->addClone(Clone);
+ Clone->MatchingCalls = Node->MatchingCalls;
assert(NodeToCallingFunc.count(Node));
NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
@@ -3021,6 +3093,14 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
if (CallMap.count(Call))
CallClone = CallMap[Call];
CallsiteClone->setCall(CallClone);
+ // Need to do the same for all matching calls.
+ for (auto &MatchingCall : Node->MatchingCalls) {
+ CallInfo CallClone(MatchingCall);
+ if (CallMap.count(MatchingCall))
+ CallClone = CallMap[MatchingCall];
+ // Updates the call in the list.
+ MatchingCall = CallClone;
+ }
};
// Keep track of the clones of callsite Node that need to be assigned to
@@ -3187,6 +3267,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
CallInfo NewCall(CallMap[OrigCall]);
assert(NewCall);
NewClone->setCall(NewCall);
+ // Need to do the same for all matching calls.
+ for (auto &MatchingCall : NewClone->MatchingCalls) {
+ CallInfo OrigMatchingCall(MatchingCall);
+ OrigMatchingCall.setCloneNo(0);
+ assert(CallMap.count(OrigMatchingCall));
+ CallInfo NewCall(CallMap[OrigMatchingCall]);
+ assert(NewCall);
+ // Updates the call in the list.
+ MatchingCall = NewCall;
+ }
}
}
// Fall through to handling below to perform the recording of the
@@ -3373,6 +3463,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
if (Node->IsAllocation) {
updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
+ assert(Node->MatchingCalls.empty());
return;
}
@@ -3381,6 +3472,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
updateCall(Node->Call, CalleeFunc);
+ // Update all the matching calls as well.
+ for (auto &Call : Node->MatchingCalls)
+ updateCall(Call, CalleeFunc);
};
// Performs DFS traversal starting from allocation nodes to update calls to
More information about the llvm-branch-commits
mailing list