[llvm] Reapply "[MemProf] Reduce cloning overhead by sharing nodes when possible" (#102932) with fixes (PR #106623)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 30 16:40:04 PDT 2024


https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/106623

>From 94737fb79315db2e4c4cb1fe3c8da347e44c9cd8 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 29 Aug 2024 13:18:10 -0700
Subject: [PATCH 1/3] Reapply "[MemProf] Reduce cloning overhead by sharing
 nodes when possible" (#102932) with fixes

This reverts commit 11aa31f595325d6b2dede3364e4b86d78fffe635, restoring
commit 055e4319112282354327af9908091fdb25149e9b, with added fixes for
linker unsats.

In some cases multiple calls to different targets may end up with the
same debug information, and therefore callsite id. We will end up
sharing the node between these calls. We don't know which one matches
the callees until all nodes are matched with calls, at which point any
non-matching calls should be removed from the node. The fix extends the
handling in handleCallsitesWithMultipleTargets to do this, and adds
tests for various permutations of this situation.
---
 .../IPO/MemProfContextDisambiguation.cpp      | 124 +++++++++++++++---
 1 file changed, 109 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 66b68d5cd457fb..c9de9c964bba0a 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -242,9 +242,16 @@ class CallsiteContextGraph {
     // recursion.
     bool Recursive = false;
 
-    // The corresponding allocation or interior call.
+    // The corresponding allocation or interior call. This is the primary call
+    // for which we have created this node.
     CallInfo Call;
 
+    // List of other calls that can be treated the same as the primary call
+    // through cloning. I.e. located in the same function and have the same
+    // (possibly pruned) stack ids. They will be updated the same way as the
+    // primary call when assigning to function clones.
+    std::vector<CallInfo> MatchingCalls;
+
     // For alloc nodes this is a unique id assigned when constructed, and for
     // callsite stack nodes it is the original stack id when the node is
     // constructed from the memprof MIB metadata on the alloc nodes. Note that
@@ -457,6 +464,9 @@ class CallsiteContextGraph {
   /// iteration.
   MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
 
+  /// Records the function each call is located in.
+  DenseMap<CallInfo, const FuncTy *> CallToFunc;
+
   /// Map from callsite node to the enclosing caller function.
   std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
 
@@ -474,7 +484,8 @@ class CallsiteContextGraph {
   /// StackIdToMatchingCalls map.
   void assignStackNodesPostOrder(
       ContextNode *Node, DenseSet<const ContextNode *> &Visited,
-      DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
+      DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
+      DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
 
   /// Duplicates the given set of context ids, updating the provided
   /// map from each original id with the newly generated context ids,
@@ -1230,10 +1241,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
-    assignStackNodesPostOrder(ContextNode *Node,
-                              DenseSet<const ContextNode *> &Visited,
-                              DenseMap<uint64_t, std::vector<CallContextInfo>>
-                                  &StackIdToMatchingCalls) {
+    assignStackNodesPostOrder(
+        ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+        DenseMap<uint64_t, std::vector<CallContextInfo>>
+            &StackIdToMatchingCalls,
+        DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
   auto Inserted = Visited.insert(Node);
   if (!Inserted.second)
     return;
@@ -1246,7 +1258,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
     // Skip any that have been removed during the recursion.
     if (!Edge)
       continue;
-    assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
+    assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
+                              CallToMatchingCall);
   }
 
   // If this node's stack id is in the map, update the graph to contain new
@@ -1289,8 +1302,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
     auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
     // Skip any for which we didn't assign any ids, these don't get a node in
     // the graph.
-    if (SavedContextIds.empty())
+    if (SavedContextIds.empty()) {
+      // If this call has a matching call (located in the same function and
+      // having the same stack ids), simply add it to the context node created
+      // for its matching call earlier. These can be treated the same through
+      // cloning and get updated at the same time.
+      if (!CallToMatchingCall.contains(Call))
+        continue;
+      auto MatchingCall = CallToMatchingCall[Call];
+      assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
+      NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
+          Call);
       continue;
+    }
 
     assert(LastId == Ids.back());
 
@@ -1422,6 +1446,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
   // there is more than one call with the same stack ids. Their (possibly newly
   // duplicated) context ids are saved in the StackIdToMatchingCalls map.
   DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
+  // Save a map from each call to any that are found to match it. I.e. located
+  // in the same function and have the same (possibly pruned) stack ids. We use
+  // this to avoid creating extra graph nodes as they can be treated the same.
+  DenseMap<CallInfo, CallInfo> CallToMatchingCall;
   for (auto &It : StackIdToMatchingCalls) {
     auto &Calls = It.getSecond();
     // Skip single calls with a single stack id. These don't need a new node.
@@ -1460,6 +1488,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
     DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
     assert(!LastNodeContextIds.empty());
 
+    // Map from function to the first call from the below list (with matching
+    // stack ids) found in that function. Note that calls from different
+    // functions can have the same stack ids because this is the list of stack
+    // ids that had (possibly pruned) nodes after building the graph from the
+    // allocation MIBs.
+    DenseMap<const FuncTy *, CallInfo> FuncToCallMap;
+
     for (unsigned I = 0; I < Calls.size(); I++) {
       auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
       assert(SavedContextIds.empty());
@@ -1533,6 +1568,18 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
           continue;
       }
 
+      const FuncTy *CallFunc = CallToFunc[Call];
+
+      // If the prior call had the same stack ids this map would not be empty.
+      // Check if we already have a call that "matches" because it is located
+      // in the same function.
+      if (FuncToCallMap.contains(CallFunc)) {
+        // Record the matching call found for this call, and skip it. We
+        // will subsequently combine it into the same node.
+        CallToMatchingCall[Call] = FuncToCallMap[CallFunc];
+        continue;
+      }
+
       // Check if the next set of stack ids is the same (since the Calls vector
       // of tuples is sorted by the stack ids we can just look at the next one).
       bool DuplicateContextIds = false;
@@ -1562,7 +1609,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
         set_subtract(LastNodeContextIds, StackSequenceContextIds);
         if (LastNodeContextIds.empty())
           break;
-      }
+        // No longer possibly in a sequence of calls with duplicate stack ids,
+        // clear the map.
+        FuncToCallMap.clear();
+      } else
+        // Record the call with its function, so we can locate it the next time
+        // we find a call from this function when processing the calls with the
+        // same stack ids.
+        FuncToCallMap[CallFunc] = Call;
     }
   }
 
@@ -1579,7 +1633,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
   // associated context ids over to the new nodes.
   DenseSet<const ContextNode *> Visited;
   for (auto &Entry : AllocationCallToContextNodeMap)
-    assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
+    assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
+                              CallToMatchingCall);
   if (VerifyCCG)
     check();
 }
@@ -1679,6 +1734,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
           continue;
         if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
           CallsWithMetadata.push_back(&I);
+          CallToFunc[&I] = &F;
           auto *AllocNode = addAllocNode(&I, &F);
           auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
           assert(CallsiteMD);
@@ -1700,8 +1756,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
           I.setMetadata(LLVMContext::MD_callsite, nullptr);
         }
         // For callsite metadata, add to list for this function for later use.
-        else if (I.getMetadata(LLVMContext::MD_callsite))
+        else if (I.getMetadata(LLVMContext::MD_callsite)) {
           CallsWithMetadata.push_back(&I);
+          CallToFunc[&I] = &F;
+        }
       }
     }
     if (!CallsWithMetadata.empty())
@@ -1756,8 +1814,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
           // correlate properly in applyImport in the backends.
           if (AN.MIBs.empty())
             continue;
-          CallsWithMetadata.push_back({&AN});
-          auto *AllocNode = addAllocNode({&AN}, FS);
+          IndexCall AllocCall(&AN);
+          CallsWithMetadata.push_back(AllocCall);
+          CallToFunc[AllocCall] = FS;
+          auto *AllocNode = addAllocNode(AllocCall, FS);
           // Pass an empty CallStack to the CallsiteContext (second)
           // parameter, since for ThinLTO we already collapsed out the inlined
           // stack ids on the allocation call during ModuleSummaryAnalysis.
@@ -1788,8 +1848,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
       }
       // For callsite metadata, add to list for this function for later use.
       if (!FS->callsites().empty())
-        for (auto &SN : FS->mutableCallsites())
-          CallsWithMetadata.push_back({&SN});
+        for (auto &SN : FS->mutableCallsites()) {
+          IndexCall StackNodeCall(&SN);
+          CallsWithMetadata.push_back(StackNodeCall);
+          CallToFunc[StackNodeCall] = FS;
+        }
 
       if (!CallsWithMetadata.empty())
         FuncToCallsWithMetadata[FS] = CallsWithMetadata;
@@ -2225,6 +2288,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
   if (Recursive)
     OS << " (recursive)";
   OS << "\n";
+  if (!MatchingCalls.empty()) {
+    OS << "\tMatchingCalls:\n";
+    for (auto &MatchingCall : MatchingCalls) {
+      OS << "\t";
+      MatchingCall.print(OS);
+      OS << "\n";
+    }
+  }
   OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
   OS << "\tContextIds:";
   // Make a copy of the computed context ids that we can sort for stability.
@@ -2478,6 +2549,7 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
       std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
   ContextNode *Clone = NodeOwner.back().get();
   Node->addClone(Clone);
+  Clone->MatchingCalls = Node->MatchingCalls;
   assert(NodeToCallingFunc.count(Node));
   NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
   moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
@@ -3021,6 +3093,14 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
         if (CallMap.count(Call))
           CallClone = CallMap[Call];
         CallsiteClone->setCall(CallClone);
+        // Need to do the same for all matching calls.
+        for (auto &MatchingCall : Node->MatchingCalls) {
+          CallInfo CallClone(MatchingCall);
+          if (CallMap.count(MatchingCall))
+            CallClone = CallMap[MatchingCall];
+          // Updates the call in the list.
+          MatchingCall = CallClone;
+        }
       };
 
       // Keep track of the clones of callsite Node that need to be assigned to
@@ -3187,6 +3267,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
               CallInfo NewCall(CallMap[OrigCall]);
               assert(NewCall);
               NewClone->setCall(NewCall);
+              // Need to do the same for all matching calls.
+              for (auto &MatchingCall : NewClone->MatchingCalls) {
+                CallInfo OrigMatchingCall(MatchingCall);
+                OrigMatchingCall.setCloneNo(0);
+                assert(CallMap.count(OrigMatchingCall));
+                CallInfo NewCall(CallMap[OrigMatchingCall]);
+                assert(NewCall);
+                // Updates the call in the list.
+                MatchingCall = NewCall;
+              }
             }
           }
           // Fall through to handling below to perform the recording of the
@@ -3373,6 +3463,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
 
     if (Node->IsAllocation) {
       updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
+      assert(Node->MatchingCalls.empty());
       return;
     }
 
@@ -3381,6 +3472,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
 
     auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
     updateCall(Node->Call, CalleeFunc);
+    // Update all the matching calls as well.
+    for (auto &Call : Node->MatchingCalls)
+      updateCall(Call, CalleeFunc);
   };
 
   // Performs DFS traversal starting from allocation nodes to update calls to

>From 0148e96ba569d962e00be5733216b812b2192e76 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 29 Aug 2024 13:27:04 -0700
Subject: [PATCH 2/3] These are the actual fixes and new tests on top of the
 original change.

---
 .../IPO/MemProfContextDisambiguation.cpp      | 107 ++++++-
 .../ThinLTO/X86/memprof-aliased-location1.ll  | 116 ++++++++
 .../ThinLTO/X86/memprof-aliased-location2.ll  | 116 ++++++++
 .../X86/memprof-tailcall-aliased-location1.ll |  99 +++++++
 .../X86/memprof-tailcall-aliased-location2.ll |  99 +++++++
 .../aliased-location1.ll                      | 274 ++++++++++++++++++
 .../aliased-location2.ll                      | 274 ++++++++++++++++++
 .../tailcall-aliased-location1.ll             | 100 +++++++
 .../tailcall-aliased-location2.ll             | 100 +++++++
 9 files changed, 1272 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
 create mode 100644 llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
 create mode 100644 llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll
 create mode 100644 llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location2.ll
 create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll
 create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll
 create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location1.ll
 create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location2.ll

diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c9de9c964bba0a..4f980ebc0dea59 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -532,6 +532,11 @@ class CallsiteContextGraph {
         Call, Func, CallerFunc, FoundCalleeChain);
   }
 
+  /// Returns true if both call instructions have the same callee.
+  bool sameCallee(CallTy Call1, CallTy Call2) {
+    return static_cast<DerivedCCG *>(this)->sameCallee(Call1, Call2);
+  }
+
   /// Get a list of nodes corresponding to the stack ids in the given
   /// callsite's context.
   std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
@@ -678,6 +683,7 @@ class ModuleCallsiteContextGraph
   bool calleeMatchesFunc(
       Instruction *Call, const Function *Func, const Function *CallerFunc,
       std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
+  bool sameCallee(Instruction *Call1, Instruction *Call2);
   bool findProfiledCalleeThroughTailCalls(
       const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
       std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
@@ -755,6 +761,7 @@ class IndexCallsiteContextGraph
       IndexCall &Call, const FunctionSummary *Func,
       const FunctionSummary *CallerFunc,
       std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
+  bool sameCallee(IndexCall &Call1, IndexCall &Call2);
   bool findProfiledCalleeThroughTailCalls(
       ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
       std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
@@ -1892,26 +1899,71 @@ void CallsiteContextGraph<DerivedCCG, FuncTy,
   // from the profiled contexts.
   MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap;
 
+  std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
   for (auto &Entry : NonAllocationCallToContextNodeMap) {
     auto *Node = Entry.second;
     assert(Node->Clones.empty());
     // Check all node callees and see if in the same function.
-    auto Call = Node->Call.call();
-    for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();
-         ++EI) {
-      auto Edge = *EI;
-      if (!Edge->Callee->hasCall())
-        continue;
-      assert(NodeToCallingFunc.count(Edge->Callee));
-      // Check if the called function matches that of the callee node.
-      if (calleesMatch(Call, EI, TailCallToContextNodeMap))
-        continue;
+    // We need to check all of the calls recorded in this Node, because in some
+    // cases we may have had multiple calls with the same debug info calling
+    // different callees. Here we will prune any that don't match all callee
+    // nodes.
+    std::vector<CallInfo> AllCalls = Node->MatchingCalls;
+    AllCalls.insert(AllCalls.begin(), Node->Call);
+    auto It = AllCalls.begin();
+    // Iterate through the calls until we find the first that matches.
+    for (; It != AllCalls.end(); ++It) {
+      auto ThisCall = *It;
+      bool Match = true;
+      for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();
+           ++EI) {
+        auto Edge = *EI;
+        if (!Edge->Callee->hasCall())
+          continue;
+        assert(NodeToCallingFunc.count(Edge->Callee));
+        // Check if the called function matches that of the callee node.
+        if (!calleesMatch(ThisCall.call(), EI, TailCallToContextNodeMap)) {
+          Match = false;
+          break;
+        }
+      }
+      // Found a call that matches the callee nodes, we can quit now.
+      if (Match) {
+        // If the first match is not the primary call on the Node, update it
+        // now. We will update the list of matching calls further below.
+        if (Node->Call != ThisCall) {
+          Node->setCall(ThisCall);
+          // We need to update the NonAllocationCallToContextNodeMap, but don't
+          // want to do this during iteration over that map, so save the calls
+          // that need updated entries.
+          NewCallToNode.push_back({ThisCall, Node});
+          // We should only have shared this node between calls from the same
+          // function.
+          assert(NodeToCallingFunc[Node] == CallToFunc[Node->Call]);
+        }
+        break;
+      }
+    }
+    // We will update this list below (or leave it cleared if there was no
+    // match found above).
+    Node->MatchingCalls.clear();
+    // If we hit the end of the AllCalls vector, no call matching the callee
+    // nodes was found, clear the call information in the node.
+    if (It == AllCalls.end()) {
       RemovedEdgesWithMismatchedCallees++;
       // Work around by setting Node to have a null call, so it gets
       // skipped during cloning. Otherwise assignFunctions will assert
       // because its data structures are not designed to handle this case.
       Node->setCall(CallInfo());
-      break;
+      continue;
+    }
+    // Now add back any matching calls that call the same function as the
+    // matching primary call on Node.
+    for (++It; It != AllCalls.end(); ++It) {
+      auto ThisCall = *It;
+      if (!sameCallee(Node->Call.call(), ThisCall.call()))
+        continue;
+      Node->MatchingCalls.push_back(ThisCall);
     }
   }
 
@@ -1919,8 +1971,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy,
   // (checking whether they have a null call which is set above). For a
   // MapVector like NonAllocationCallToContextNodeMap it is much more efficient
   // to do the removal via remove_if than by individually erasing entries above.
-  NonAllocationCallToContextNodeMap.remove_if(
-      [](const auto &it) { return !it.second->hasCall(); });
+  // Also remove any entries if we updated the node's primary call above.
+  NonAllocationCallToContextNodeMap.remove_if([](const auto &it) {
+    return !it.second->hasCall() || it.second->Call != it.first;
+  });
+
+  // Add entries for any new primary calls recorded above.
+  for (auto &[Call, Node] : NewCallToNode)
+    NonAllocationCallToContextNodeMap[Call] = Node;
 
   // Add the new nodes after the above loop so that the iteration is not
   // invalidated.
@@ -2146,6 +2204,21 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(
   return true;
 }
 
+bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
+                                            Instruction *Call2) {
+  auto *CB1 = dyn_cast<CallBase>(Call1);
+  if (!CB1->getCalledOperand() || CB1->isIndirectCall())
+    return false;
+  auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
+  auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
+  auto *CB2 = dyn_cast<CallBase>(Call2);
+  if (!CB2->getCalledOperand() || CB2->isIndirectCall())
+    return false;
+  auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
+  auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
+  return CalleeFunc1 == CalleeFunc2;
+}
+
 bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
     ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
     std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
@@ -2272,6 +2345,14 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(
   return true;
 }
 
+bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
+  ValueInfo Callee1 =
+      dyn_cast_if_present<CallsiteInfo *>(Call1.getBase())->Callee;
+  ValueInfo Callee2 =
+      dyn_cast_if_present<CallsiteInfo *>(Call2.getBase())->Callee;
+  return Callee1 == Callee2;
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
     const {
diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
new file mode 100644
index 00000000000000..42819d5421ca0f
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
@@ -0,0 +1,116 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a preceding call in the alloc context
+;; does not cause missing or incorrect cloning. This test is otherwise the same
+;; as memprof-basic.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -thinlto-bc %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,blah, \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-memprof-verify-ccg -memprof-verify-nodes \
+; RUN:	-stats -pass-remarks=memprof-context-disambiguation -save-temps \
+; RUN:	-o %t.out 2>&1 | FileCheck %s \
+; RUN:	--check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
+
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+
+source_filename = "memprof-aliased-location1.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call ptr @_Z3foov(), !callsite !0
+  %call1 = call ptr @_Z3foov(), !callsite !1
+  ret i32 0
+}
+
+declare void @blah()
+
+define internal ptr @_Z3barv() #0 {
+entry:
+  %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #0 {
+entry:
+  ;; Preceding call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !8
+  %call = call ptr @_Z3barv(), !callsite !8
+  ret ptr null
+}
+
+define internal ptr @_Z3foov() #0 {
+entry:
+  %call = call ptr @_Z3bazv(), !callsite !9
+  ret ptr null
+}
+
+; uselistorder directives
+uselistorder ptr @_Z3foov, { 1, 0 }
+
+attributes #0 = { noinline optnone }
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold", i64 100}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold", i64 400}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+
+; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
+; REMARKS: created clone _Z3barv.memprof.1
+; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
+; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
+; REMARKS: created clone _Z3bazv.memprof.1
+; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
+; REMARKS: created clone _Z3foov.memprof.1
+; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1
+
+
+; IR: define {{.*}} @main
+;; The first call to foo does not allocate cold memory. It should call the
+;; original functions, which ultimately call the original allocation decorated
+;; with a "notcold" attribute.
+; IR:   call {{.*}} @_Z3foov()
+;; The second call to foo allocates cold memory. It should call cloned functions
+;; which ultimately call a cloned allocation decorated with a "cold" attribute.
+; IR:   call {{.*}} @_Z3foov.memprof.1()
+; IR: define internal {{.*}} @_Z3barv()
+; IR:   call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv()
+; IR:   call {{.*}} @_Z3barv()
+; IR: define internal {{.*}} @_Z3foov()
+; IR:   call {{.*}} @_Z3bazv()
+; IR: define internal {{.*}} @_Z3barv.memprof.1()
+; IR:   call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv.memprof.1()
+; IR:   call {{.*}} @_Z3barv.memprof.1()
+; IR: define internal {{.*}} @_Z3foov.memprof.1()
+; IR:   call {{.*}} @_Z3bazv.memprof.1()
+; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { "memprof"="cold" }
+
+
+; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
+; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
+; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
+; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
+; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
+; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
+; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
+; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
+; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
+; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
new file mode 100644
index 00000000000000..663f8525043c2f
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
@@ -0,0 +1,116 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a subsequent call in the alloc context
+;; does not cause missing or incorrect cloning. This test is otherwise the same
+;; as memprof-basic.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -thinlto-bc %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,blah, \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-memprof-verify-ccg -memprof-verify-nodes \
+; RUN:	-stats -pass-remarks=memprof-context-disambiguation -save-temps \
+; RUN:	-o %t.out 2>&1 | FileCheck %s \
+; RUN:	--check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
+
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+
+source_filename = "memprof-aliased-location2.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call ptr @_Z3foov(), !callsite !0
+  %call1 = call ptr @_Z3foov(), !callsite !1
+  ret i32 0
+}
+
+declare void @blah()
+
+define internal ptr @_Z3barv() #0 {
+entry:
+  %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #0 {
+entry:
+  %call = call ptr @_Z3barv(), !callsite !8
+  ;; Subsequent call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !8
+  ret ptr null
+}
+
+define internal ptr @_Z3foov() #0 {
+entry:
+  %call = call ptr @_Z3bazv(), !callsite !9
+  ret ptr null
+}
+
+; uselistorder directives
+uselistorder ptr @_Z3foov, { 1, 0 }
+
+attributes #0 = { noinline optnone }
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold", i64 100}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold", i64 400}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+
+; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
+; REMARKS: created clone _Z3barv.memprof.1
+; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
+; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
+; REMARKS: created clone _Z3bazv.memprof.1
+; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
+; REMARKS: created clone _Z3foov.memprof.1
+; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1
+
+
+; IR: define {{.*}} @main
+;; The first call to foo does not allocate cold memory. It should call the
+;; original functions, which ultimately call the original allocation decorated
+;; with a "notcold" attribute.
+; IR:   call {{.*}} @_Z3foov()
+;; The second call to foo allocates cold memory. It should call cloned functions
+;; which ultimately call a cloned allocation decorated with a "cold" attribute.
+; IR:   call {{.*}} @_Z3foov.memprof.1()
+; IR: define internal {{.*}} @_Z3barv()
+; IR:   call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv()
+; IR:   call {{.*}} @_Z3barv()
+; IR: define internal {{.*}} @_Z3foov()
+; IR:   call {{.*}} @_Z3bazv()
+; IR: define internal {{.*}} @_Z3barv.memprof.1()
+; IR:   call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv.memprof.1()
+; IR:   call {{.*}} @_Z3barv.memprof.1()
+; IR: define internal {{.*}} @_Z3foov.memprof.1()
+; IR:   call {{.*}} @_Z3bazv.memprof.1()
+; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { "memprof"="cold" }
+
+
+; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
+; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
+; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
+; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
+; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
+; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
+; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
+; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
+; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
+; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll
new file mode 100644
index 00000000000000..3f5dc7732dc5c3
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll
@@ -0,0 +1,99 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a preceding tail call in the alloc
+;; context does not cause missing or incorrect cloning. This test is otherwise
+;; the same as memprof-tailcall.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -thinlto-bc %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
+; RUN:  -r=%t.o,_Z3barv,plx \
+; RUN:  -r=%t.o,_Z3bazv,plx \
+; RUN:  -r=%t.o,_Z3foov,plx \
+; RUN:  -r=%t.o,main,plx \
+; RUN:  -r=%t.o,_Znam, \
+; RUN:  -r=%t.o,blah, \
+; RUN:  -stats -save-temps \
+; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS
+
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+
+; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls
+; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls
+; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls
+
+source_filename = "memprof-tailcall-aliased-location1.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3barv()
+define ptr @_Z3barv() local_unnamed_addr #0 {
+entry:
+  ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
+  %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5
+  ret ptr %call
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare ptr @_Znam(i64) #1
+declare void @blah()
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3bazv()
+define ptr @_Z3bazv() #0 {
+entry:
+  ; IR: call ptr @_Z3barv()
+  %call = tail call ptr @_Z3barv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3foov()
+define ptr @_Z3foov() #0 {
+entry:
+  ; IR: call ptr @_Z3bazv()
+  %call = tail call ptr @_Z3bazv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @main()
+define i32 @main() #0 {
+  ;; Preceding call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !6
+  ;; The first call to foo is part of a cold context, and should use the
+  ;; original functions.
+  ; IR: call ptr @_Z3foov()
+  %call = tail call ptr @_Z3foov(), !callsite !6
+  ;; The second call to foo is part of a cold context, and should call the
+  ;; cloned functions.
+  ; IR: call ptr @_Z3foov.memprof.1()
+  %call1 = tail call ptr @_Z3foov(), !callsite !7
+  ret i32 0
+}
+
+; IR-LABEL: @_Z3barv.memprof.1()
+; IR: call {{.*}} @_Znam(i64 10) #[[COLD:[0-9]+]]
+; IR-LABEL: @_Z3bazv.memprof.1()
+; IR: call ptr @_Z3barv.memprof.1()
+; IR-LABEL: @_Z3foov.memprof.1()
+; IR: call ptr @_Z3bazv.memprof.1()
+
+; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" }
+
+attributes #0 = { noinline }
+attributes #1 = { nobuiltin allocsize(0) }
+attributes #2 = { builtin allocsize(0) }
+
+!0 = !{!1, !3}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 3186456655321080972, i64 8632435727821051414}
+!3 = !{!4, !"cold"}
+!4 = !{i64 3186456655321080972, i64 -3421689549917153178}
+!5 = !{i64 3186456655321080972}
+!6 = !{i64 8632435727821051414}
+!7 = !{i64 -3421689549917153178}
diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location2.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location2.ll
new file mode 100644
index 00000000000000..3085b4e41938b2
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location2.ll
@@ -0,0 +1,99 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a subsequent tail call in the alloc
+;; context does not cause missing or incorrect cloning. This test is otherwise
+;; the same as memprof-tailcall.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -thinlto-bc %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
+; RUN:  -r=%t.o,_Z3barv,plx \
+; RUN:  -r=%t.o,_Z3bazv,plx \
+; RUN:  -r=%t.o,_Z3foov,plx \
+; RUN:  -r=%t.o,main,plx \
+; RUN:  -r=%t.o,_Znam, \
+; RUN:  -r=%t.o,blah, \
+; RUN:  -stats -save-temps \
+; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS
+
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+
+; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls
+; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls
+; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls
+
+source_filename = "memprof-tailcall-aliased-location2.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3barv()
+define ptr @_Z3barv() local_unnamed_addr #0 {
+entry:
+  ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
+  %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5
+  ret ptr %call
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare ptr @_Znam(i64) #1
+declare void @blah()
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3bazv()
+define ptr @_Z3bazv() #0 {
+entry:
+  ; IR: call ptr @_Z3barv()
+  %call = tail call ptr @_Z3barv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3foov()
+define ptr @_Z3foov() #0 {
+entry:
+  ; IR: call ptr @_Z3bazv()
+  %call = tail call ptr @_Z3bazv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @main()
+define i32 @main() #0 {
+  ;; The first call to foo is part of a cold context, and should use the
+  ;; original functions.
+  ; IR: call ptr @_Z3foov()
+  %call = tail call ptr @_Z3foov(), !callsite !6
+  ;; Subsequent call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !6
+  ;; The second call to foo is part of a cold context, and should call the
+  ;; cloned functions.
+  ; IR: call ptr @_Z3foov.memprof.1()
+  %call1 = tail call ptr @_Z3foov(), !callsite !7
+  ret i32 0
+}
+
+; IR-LABEL: @_Z3barv.memprof.1()
+; IR: call {{.*}} @_Znam(i64 10) #[[COLD:[0-9]+]]
+; IR-LABEL: @_Z3bazv.memprof.1()
+; IR: call ptr @_Z3barv.memprof.1()
+; IR-LABEL: @_Z3foov.memprof.1()
+; IR: call ptr @_Z3bazv.memprof.1()
+
+; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" }
+
+attributes #0 = { noinline }
+attributes #1 = { nobuiltin allocsize(0) }
+attributes #2 = { builtin allocsize(0) }
+
+!0 = !{!1, !3}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 3186456655321080972, i64 8632435727821051414}
+!3 = !{!4, !"cold"}
+!4 = !{i64 3186456655321080972, i64 -3421689549917153178}
+!5 = !{i64 3186456655321080972}
+!6 = !{i64 8632435727821051414}
+!7 = !{i64 -3421689549917153178}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll
new file mode 100644
index 00000000000000..8f9df20471e41c
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll
@@ -0,0 +1,274 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a preceding call in the alloc context
+;; does not cause missing or incorrect cloning. This test is otherwise the same
+;; as basic.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN:	-memprof-verify-ccg -memprof-verify-nodes \
+; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
+; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=IR \
+; RUN:	--check-prefix=STATS --check-prefix=REMARKS
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call noundef ptr @_Z3foov(), !callsite !0
+  %call1 = call noundef ptr @_Z3foov(), !callsite !1
+  ret i32 0
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+
+; Function Attrs: nobuiltin
+declare void @_ZdaPv() #2
+
+define internal ptr @_Z3barv() #3 {
+entry:
+  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+declare void @blah()
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+  ;; Preceding call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !8
+  %call = call noundef ptr @_Z3barv(), !callsite !8
+  ret ptr null
+}
+
+; Function Attrs: noinline
+define internal ptr @_Z3foov() #5 {
+entry:
+  %call = call noundef ptr @_Z3bazv(), !callsite !9
+  ret ptr null
+}
+
+; uselistorder directives
+uselistorder ptr @_Z3foov, { 1, 0 }
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #2 = { nobuiltin }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold", i64 100}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold", i64 400}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+
+
+; DUMP: CCG before cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR:0x[a-z0-9]+]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR:0x[a-z0-9]+]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAZ2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[FOO]]
+
+; DUMP: Node [[BAZ2]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAZ]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAR]]
+
+
+; REMARKS: created clone _Z3barv.memprof.1
+; REMARKS: created clone _Z3bazv.memprof.1
+; REMARKS: created clone _Z3foov.memprof.1
+; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
+; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1
+; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
+; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
+; REMARKS: call in clone main assigned to call function clone _Z3foov
+; REMARKS: call in clone _Z3foov assigned to call function clone _Z3bazv
+; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
+; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
+
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
+
+; IR: define {{.*}} @main
+;; The first call to foo does not allocate cold memory. It should call the
+;; original functions, which ultimately call the original allocation decorated
+;; with a "notcold" attribute.
+; IR:   call {{.*}} @_Z3foov()
+;; The second call to foo allocates cold memory. It should call cloned functions
+;; which ultimately call a cloned allocation decorated with a "cold" attribute.
+; IR:   call {{.*}} @_Z3foov.memprof.1()
+; IR: define internal {{.*}} @_Z3barv()
+; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv()
+; IR:   call {{.*}} @_Z3barv()
+; IR: define internal {{.*}} @_Z3foov()
+; IR:   call {{.*}} @_Z3bazv()
+; IR: define internal {{.*}} @_Z3barv.memprof.1()
+; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv.memprof.1()
+; IR:   call {{.*}} @_Z3barv.memprof.1()
+; IR: define internal {{.*}} @_Z3foov.memprof.1()
+; IR:   call {{.*}} @_Z3bazv.memprof.1()
+; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
+
+
+; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
+; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
+; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
+
+
+; DOT: digraph "postbuild" {
+; DOT: 	label="postbuild";
+; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOT: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOT: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOT: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOT: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOT: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: }
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll
new file mode 100644
index 00000000000000..c3c164d4928632
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll
@@ -0,0 +1,274 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a subsequent call in the alloc context
+;; does not cause missing or incorrect cloning. This test is otherwise the same
+;; as basic.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN:	-memprof-verify-ccg -memprof-verify-nodes \
+; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
+; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=IR \
+; RUN:	--check-prefix=STATS --check-prefix=REMARKS
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call noundef ptr @_Z3foov(), !callsite !0
+  %call1 = call noundef ptr @_Z3foov(), !callsite !1
+  ret i32 0
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+
+; Function Attrs: nobuiltin
+declare void @_ZdaPv() #2
+
+define internal ptr @_Z3barv() #3 {
+entry:
+  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+declare void @blah()
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+  %call = call noundef ptr @_Z3barv(), !callsite !8
+  ;; Subsequent call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !8
+  ret ptr null
+}
+
+; Function Attrs: noinline
+define internal ptr @_Z3foov() #5 {
+entry:
+  %call = call noundef ptr @_Z3bazv(), !callsite !9
+  ret ptr null
+}
+
+; uselistorder directives
+uselistorder ptr @_Z3foov, { 1, 0 }
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #2 = { nobuiltin }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold", i64 100}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold", i64 400}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+
+
+; DUMP: CCG before cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR:0x[a-z0-9]+]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR:0x[a-z0-9]+]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAZ2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[FOO]]
+
+; DUMP: Node [[BAZ2]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAZ]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAR]]
+
+
+; REMARKS: created clone _Z3barv.memprof.1
+; REMARKS: created clone _Z3bazv.memprof.1
+; REMARKS: created clone _Z3foov.memprof.1
+; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
+; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1
+; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
+; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
+; REMARKS: call in clone main assigned to call function clone _Z3foov
+; REMARKS: call in clone _Z3foov assigned to call function clone _Z3bazv
+; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
+; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
+
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
+
+; IR: define {{.*}} @main
+;; The first call to foo does not allocate cold memory. It should call the
+;; original functions, which ultimately call the original allocation decorated
+;; with a "notcold" attribute.
+; IR:   call {{.*}} @_Z3foov()
+;; The second call to foo allocates cold memory. It should call cloned functions
+;; which ultimately call a cloned allocation decorated with a "cold" attribute.
+; IR:   call {{.*}} @_Z3foov.memprof.1()
+; IR: define internal {{.*}} @_Z3barv()
+; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv()
+; IR:   call {{.*}} @_Z3barv()
+; IR: define internal {{.*}} @_Z3foov()
+; IR:   call {{.*}} @_Z3bazv()
+; IR: define internal {{.*}} @_Z3barv.memprof.1()
+; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
+; IR: define internal {{.*}} @_Z3bazv.memprof.1()
+; IR:   call {{.*}} @_Z3barv.memprof.1()
+; IR: define internal {{.*}} @_Z3foov.memprof.1()
+; IR:   call {{.*}} @_Z3bazv.memprof.1()
+; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
+
+
+; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
+; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
+; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
+
+
+; DOT: digraph "postbuild" {
+; DOT: 	label="postbuild";
+; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOT: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOT: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOT: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOT: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOT: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: }
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location1.ll b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location1.ll
new file mode 100644
index 00000000000000..e0bcd284c097c2
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location1.ll
@@ -0,0 +1,100 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a preceding tail call in the alloc
+;; context does not cause missing or incorrect cloning. This test is otherwise
+;; the same as tailcall.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN:  -stats %s -S 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=IR
+
+source_filename = "tailcall-aliased-location1.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = dso_local global [2 x ptr] [ptr @_Z2a1v, ptr @_Z2a2v], align 16
+
+declare void @_Z2a1v() #0
+
+declare void @_Z2a2v() #0
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3barv()
+define ptr @_Z3barv() local_unnamed_addr #0 {
+entry:
+  ; IR: call ptr @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
+  %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5
+  ret ptr %call
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare ptr @_Znam(i64) #1
+declare void @blah()
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3bazv()
+define ptr @_Z3bazv() #0 {
+entry:
+  ; IR: call ptr @_Z3barv()
+  %call = tail call ptr @_Z3barv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3foov()
+define ptr @_Z3foov() #0 {
+entry:
+  ; IR: call ptr @_Z3bazv()
+  %call = tail call ptr @_Z3bazv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @main()
+define i32 @main() #0 {
+  ;; Preceding call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !6
+  ;; The first call to foo is part of a cold context, and should use the
+  ;; original functions.
+  ;; allocation. The latter should call the cloned functions.
+  ; IR: call ptr @_Z3foov()
+  %call = tail call ptr @_Z3foov(), !callsite !6
+  ;; The second call to foo is part of a cold context, and should call the
+  ;; cloned functions.
+  ; IR: call ptr @_Z3foov.memprof.1()
+  %call1 = tail call ptr @_Z3foov(), !callsite !7
+  %2 = load ptr, ptr @a, align 16
+  call void %2(), !callsite !10
+  ret i32 0
+}
+
+; IR-LABEL: @_Z3barv.memprof.1()
+; IR: call ptr @_Znam(i64 10) #[[COLD:[0-9]+]]
+; IR-LABEL: @_Z3bazv.memprof.1()
+; IR: call ptr @_Z3barv.memprof.1()
+; IR-LABEL: @_Z3foov.memprof.1()
+; IR: call ptr @_Z3bazv.memprof.1()
+
+; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" }
+
+; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls
+; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls
+; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls
+
+attributes #0 = { noinline }
+attributes #1 = { nobuiltin allocsize(0) }
+attributes #2 = { builtin allocsize(0) }
+
+!0 = !{!1, !3, !8}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 3186456655321080972, i64 8632435727821051414}
+!3 = !{!4, !"cold"}
+!4 = !{i64 3186456655321080972, i64 -3421689549917153178}
+!5 = !{i64 3186456655321080972}
+!6 = !{i64 8632435727821051414}
+!7 = !{i64 -3421689549917153178}
+!8 = !{!9, !"notcold"}
+!9 = !{i64 3186456655321080972, i64 1}
+!10 = !{i64 1}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location2.ll
new file mode 100644
index 00000000000000..1e76243fe0f48b
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/tailcall-aliased-location2.ll
@@ -0,0 +1,100 @@
+;; Test to ensure a call to a different callee but with the same debug info
+;; (and therefore callsite metadata) as a subsequent tail call in the alloc
+;; context does not cause missing or incorrect cloning. This test is otherwise
+;; the same as tailcall.ll.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN:  -stats %s -S 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=IR
+
+source_filename = "tailcall-aliased-location2.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = dso_local global [2 x ptr] [ptr @_Z2a1v, ptr @_Z2a2v], align 16
+
+declare void @_Z2a1v() #0
+
+declare void @_Z2a2v() #0
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3barv()
+define ptr @_Z3barv() local_unnamed_addr #0 {
+entry:
+  ; IR: call ptr @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
+  %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5
+  ret ptr %call
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare ptr @_Znam(i64) #1
+declare void @blah()
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3bazv()
+define ptr @_Z3bazv() #0 {
+entry:
+  ; IR: call ptr @_Z3barv()
+  %call = tail call ptr @_Z3barv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @_Z3foov()
+define ptr @_Z3foov() #0 {
+entry:
+  ; IR: call ptr @_Z3bazv()
+  %call = tail call ptr @_Z3bazv()
+  ret ptr %call
+}
+
+; Function Attrs: noinline
+; IR-LABEL: @main()
+define i32 @main() #0 {
+  ;; The first call to foo is part of a cold context, and should use the
+  ;; original functions.
+  ;; allocation. The latter should call the cloned functions.
+  ; IR: call ptr @_Z3foov()
+  %call = tail call ptr @_Z3foov(), !callsite !6
+  ;; Subsequent call to another callee but with the same debug location / callsite id
+  call void @blah(), !callsite !6
+  ;; The second call to foo is part of a cold context, and should call the
+  ;; cloned functions.
+  ; IR: call ptr @_Z3foov.memprof.1()
+  %call1 = tail call ptr @_Z3foov(), !callsite !7
+  %2 = load ptr, ptr @a, align 16
+  call void %2(), !callsite !10
+  ret i32 0
+}
+
+; IR-LABEL: @_Z3barv.memprof.1()
+; IR: call ptr @_Znam(i64 10) #[[COLD:[0-9]+]]
+; IR-LABEL: @_Z3bazv.memprof.1()
+; IR: call ptr @_Z3barv.memprof.1()
+; IR-LABEL: @_Z3foov.memprof.1()
+; IR: call ptr @_Z3bazv.memprof.1()
+
+; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
+; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" }
+
+; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls
+; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls
+; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls
+
+attributes #0 = { noinline }
+attributes #1 = { nobuiltin allocsize(0) }
+attributes #2 = { builtin allocsize(0) }
+
+!0 = !{!1, !3, !8}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 3186456655321080972, i64 8632435727821051414}
+!3 = !{!4, !"cold"}
+!4 = !{i64 3186456655321080972, i64 -3421689549917153178}
+!5 = !{i64 3186456655321080972}
+!6 = !{i64 8632435727821051414}
+!7 = !{i64 -3421689549917153178}
+!8 = !{!9, !"notcold"}
+!9 = !{i64 3186456655321080972, i64 1}
+!10 = !{i64 1}

>From a9f19fd273c00edb8131289e2e15fa6edb7e7bad Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Fri, 30 Aug 2024 16:39:46 -0700
Subject: [PATCH 3/3] Address comments

---
 .../IPO/MemProfContextDisambiguation.cpp         | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 4f980ebc0dea59..4723d6e101c416 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -1906,10 +1906,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy,
     // Check all node callees and see if in the same function.
     // We need to check all of the calls recorded in this Node, because in some
     // cases we may have had multiple calls with the same debug info calling
-    // different callees. Here we will prune any that don't match all callee
-    // nodes.
-    std::vector<CallInfo> AllCalls = Node->MatchingCalls;
-    AllCalls.insert(AllCalls.begin(), Node->Call);
+    // different callees. This can happen, for example, when an object is
+    // constructed in the paramter list - the destructor call of the object has
+    // the same debug info (line/col) as the call the object was passed to.
+    // Here we will prune any that don't match all callee nodes.
+    std::vector<CallInfo> AllCalls;
+    AllCalls.reserve(Node->MatchingCalls.size() + 1);
+    AllCalls.push_back(Node->Call);
+    AllCalls.insert(AllCalls.end(), Node->MatchingCalls.begin(), Node->MatchingCalls.end());
     auto It = AllCalls.begin();
     // Iterate through the calls until we find the first that matches.
     for (; It != AllCalls.end(); ++It) {
@@ -2206,12 +2210,12 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(
 
 bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
                                             Instruction *Call2) {
-  auto *CB1 = dyn_cast<CallBase>(Call1);
+  auto *CB1 = cast<CallBase>(Call1);
   if (!CB1->getCalledOperand() || CB1->isIndirectCall())
     return false;
   auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
   auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
-  auto *CB2 = dyn_cast<CallBase>(Call2);
+  auto *CB2 = cast<CallBase>(Call2);
   if (!CB2->getCalledOperand() || CB2->isIndirectCall())
     return false;
   auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();



More information about the llvm-commits mailing list