[llvm] a104e27 - Restore "[MemProf] Context disambiguation cloning pass [patch 2/3]"

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 21 19:40:22 PDT 2023


Author: Teresa Johnson
Date: 2023-04-21T19:38:46-07:00
New Revision: a104e27030587507a711cef0e2b0ddb447fe68fe

URL: https://github.com/llvm/llvm-project/commit/a104e27030587507a711cef0e2b0ddb447fe68fe
DIFF: https://github.com/llvm/llvm-project/commit/a104e27030587507a711cef0e2b0ddb447fe68fe.diff

LOG: Restore "[MemProf] Context disambiguation cloning pass [patch 2/3]"

This restores d0649a6ad8be778abf7569f502148d577f8bc6f1 (reverted in
commit 03bf59d275a16815dc5a2e3f279815554f7cd0ca), with fixes for bot
failures. Confirmed that gcc, which reproduced both failures, now
builds it fine.

Differential Revision: https://reviews.llvm.org/D140949

Added: 
    

Modified: 
    llvm/include/llvm/IR/ModuleSummaryIndex.h
    llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
    llvm/test/ThinLTO/X86/memprof-basic.ll
    llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
    llvm/test/ThinLTO/X86/memprof-indirectcall.ll
    llvm/test/ThinLTO/X86/memprof-inlined.ll
    llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
    llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
    llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 39f44791921aa..c540fa5b4ba14 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -341,7 +341,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) {
 // Values should be powers of two so that they can be ORed, in particular to
 // track allocations that have 
diff erent behavior with 
diff erent calling
 // contexts.
-enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
+enum class AllocationType : uint8_t {
+  None = 0,
+  NotCold = 1,
+  Cold = 2,
+  All = 3 // This should always be set to the OR of all values.
+};
 
 /// Summary of a single MIB in a memprof metadata on allocations.
 struct MIBInfo {

diff  --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index f9ad2d30d07b9..e6798e18982c9 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -67,22 +67,6 @@ static cl::opt<bool>
     VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
                 cl::desc("Perform frequent verification checks on nodes."));
 
-inline bool hasSingleAllocType(uint8_t AllocTypes) {
-  switch (AllocTypes) {
-  case (uint8_t)AllocationType::Cold:
-  case (uint8_t)AllocationType::NotCold:
-    return true;
-    break;
-  case (uint8_t)AllocationType::None:
-    assert(false);
-    break;
-  default:
-    return false;
-    break;
-  }
-  llvm_unreachable("invalid alloc type");
-}
-
 /// CRTP base for graphs built from either IR or ThinLTO summary index.
 ///
 /// The graph represents the call contexts in all memprof metadata on allocation
@@ -107,6 +91,10 @@ class CallsiteContextGraph {
   /// Main entry point to perform analysis and transformations on graph.
   bool process();
 
+  /// Perform cloning on the graph necessary to uniquely identify the allocation
+  /// behavior of an allocation based on its context.
+  void identifyClones();
+
   void dump() const;
   void print(raw_ostream &OS) const;
 
@@ -214,16 +202,15 @@ class CallsiteContextGraph {
     ContextNode(bool IsAllocation, CallInfo C)
         : IsAllocation(IsAllocation), Call(C) {}
 
-    std::unique_ptr<ContextNode> clone() {
-      auto Clone = std::make_unique<ContextNode>(IsAllocation, Call);
+    void addClone(ContextNode *Clone) {
       if (CloneOf) {
-        CloneOf->Clones.push_back(Clone.get());
+        CloneOf->Clones.push_back(Clone);
         Clone->CloneOf = CloneOf;
       } else {
-        Clones.push_back(Clone.get());
+        Clones.push_back(Clone);
+        assert(!Clone->CloneOf);
         Clone->CloneOf = this;
       }
-      return Clone;
     }
 
     ContextNode *getOrigNode() {
@@ -292,6 +279,10 @@ class CallsiteContextGraph {
     }
   };
 
+  /// Helper to remove callee edges that have allocation type None (due to not
+  /// carrying any context ids) after transformations.
+  void removeNoneTypeCalleeEdges(ContextNode *Node);
+
 protected:
   /// Get a list of nodes corresponding to the stack ids in the given callsite
   /// context.
@@ -403,6 +394,40 @@ class CallsiteContextGraph {
   /// unioning their recorded alloc types.
   uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
 
+  /// Returns the alloction type of the intersection of the contexts of two
+  /// nodes (based on their provided context id sets), optimized for the case
+  /// when Node1Ids is smaller than Node2Ids.
+  uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
+                                  const DenseSet<uint32_t> &Node2Ids);
+
+  /// Returns the alloction type of the intersection of the contexts of two
+  /// nodes (based on their provided context id sets).
+  uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
+                              const DenseSet<uint32_t> &Node2Ids);
+
+  /// Create a clone of Edge's callee and move Edge to that new callee node,
+  /// performing the necessary context id and allocation type updates.
+  /// If callee's caller edge iterator is supplied, it is updated when removing
+  /// the edge from that list.
+  ContextNode *
+  moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+                           EdgeIter *CallerEdgeI = nullptr);
+
+  /// Change the callee of Edge to existing callee clone NewCallee, performing
+  /// the necessary context id and allocation type updates.
+  /// If callee's caller edge iterator is supplied, it is updated when removing
+  /// the edge from that list.
+  void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+                                     ContextNode *NewCallee,
+                                     EdgeIter *CallerEdgeI = nullptr,
+                                     bool NewClone = false);
+
+  /// Recursively perform cloning on the graph for the given Node and its
+  /// callers, in order to uniquely identify the allocation behavior of an
+  /// allocation given its context.
+  void identifyClones(ContextNode *Node,
+                      DenseSet<const ContextNode *> &Visited);
+
   /// Map from each context ID to the AllocationType assigned to that context.
   std::map<uint32_t, AllocationType> ContextIdToAllocationType;
 
@@ -543,6 +568,28 @@ AllocationType allocTypeToUse(uint8_t AllocTypes) {
     return (AllocationType)AllocTypes;
 }
 
+// Helper to check if the alloc types for all edges recorded in the
+// InAllocTypes vector match the alloc types for all edges in the Edges
+// vector.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+bool allocTypesMatch(
+    const std::vector<uint8_t> &InAllocTypes,
+    const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
+        &Edges) {
+  return std::equal(
+      InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
+      [](const uint8_t &l,
+         const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
+        // Can share if one of the edges is None type - don't
+        // care about the type along that edge as it doesn't
+        // exist for those context ids.
+        if (l == (uint8_t)AllocationType::None ||
+            r->AllocTypes == (uint8_t)AllocationType::None)
+          return true;
+        return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
+      });
+}
+
 } // end anonymous namespace
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -607,6 +654,20 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
   Caller->CalleeEdges.push_back(Edge);
 }
 
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<
+    DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
+  for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
+    auto Edge = *EI;
+    if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
+      assert(Edge->ContextIds.empty());
+      Edge->Callee->eraseCallerEdge(Edge.get());
+      EI = Node->CalleeEdges.erase(EI);
+    } else
+      ++EI;
+  }
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
@@ -666,6 +727,33 @@ uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
   return AllocType;
 }
 
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+uint8_t
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
+    const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
+  uint8_t BothTypes =
+      (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
+  uint8_t AllocType = (uint8_t)AllocationType::None;
+  for (auto Id : Node1Ids) {
+    if (!Node2Ids.count(Id))
+      continue;
+    AllocType |= (uint8_t)ContextIdToAllocationType[Id];
+    // Bail early if alloc type reached both, no further refinement.
+    if (AllocType == BothTypes)
+      return AllocType;
+  }
+  return AllocType;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
+    const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
+  if (Node1Ids.size() < Node2Ids.size())
+    return intersectAllocTypesImpl(Node1Ids, Node2Ids);
+  else
+    return intersectAllocTypesImpl(Node2Ids, Node1Ids);
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
@@ -1121,7 +1209,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
       // not fully matching stack contexts. To do this, subtract any context ids
       // found in caller nodes of the last node found above.
       if (Ids.back() != getLastStackId(Call)) {
-        for (const auto &PE : LastNode->CallerEdges) {
+        for (const auto &PE : CurNode->CallerEdges) {
           set_subtract(StackSequenceContextIds, PE->getContextIds());
           if (StackSequenceContextIds.empty())
             break;
@@ -1572,7 +1660,8 @@ static void checkEdge(
 }
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) {
+static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
+                      bool CheckEdges = false) {
   if (Node->isRemoved())
     return;
   // Node's context ids should be the union of both its callee and caller edge
@@ -1584,6 +1673,8 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) {
     DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
     for (; EI != Node->CallerEdges.end(); EI++) {
       const auto &Edge = *EI;
+      if (CheckEdges)
+        checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
       set_union(CallerEdgeContextIds, Edge->ContextIds);
     }
     // Node can have more context ids than callers if some contexts terminate at
@@ -1598,6 +1689,8 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) {
     DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
     for (; EI != Node->CalleeEdges.end(); EI++) {
       const auto &Edge = *EI;
+      if (CheckEdges)
+        checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
       set_union(CalleeEdgeContextIds, Edge->ContextIds);
     }
     assert(Node->ContextIds == CalleeEdgeContextIds);
@@ -1760,6 +1853,276 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
              DotFilePathPrefix + "ccg." + Label + ".dot");
 }
 
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
+    const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) {
+  ContextNode *Node = Edge->Callee;
+  NodeOwner.push_back(
+      std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
+  ContextNode *Clone = NodeOwner.back().get();
+  Node->addClone(Clone);
+  assert(NodeToCallingFunc.count(Node));
+  NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
+  moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true);
+  return Clone;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
+    moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+                                  ContextNode *NewCallee, EdgeIter *CallerEdgeI,
+                                  bool NewClone) {
+  // NewCallee and Edge's current callee must be clones of the same original
+  // node (Edge's current callee may be the original node too).
+  assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
+  auto &EdgeContextIds = Edge->getContextIds();
+  ContextNode *OldCallee = Edge->Callee;
+  if (CallerEdgeI)
+    *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
+  else
+    OldCallee->eraseCallerEdge(Edge.get());
+  Edge->Callee = NewCallee;
+  NewCallee->CallerEdges.push_back(Edge);
+  // Don't need to update Edge's context ids since we are simply reconnecting
+  // it.
+  set_subtract(OldCallee->ContextIds, EdgeContextIds);
+  NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end());
+  NewCallee->AllocTypes |= Edge->AllocTypes;
+  OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
+  // OldCallee alloc type should be None iff its context id set is now empty.
+  assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
+         OldCallee->ContextIds.empty());
+  // Now walk the old callee node's callee edges and move Edge's context ids
+  // over to the corresponding edge into the clone (which is created here if
+  // this is a newly created clone).
+  for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
+    // The context ids moving to the new callee are the subset of this edge's
+    // context ids and the context ids on the caller edge being moved.
+    DenseSet<uint32_t> EdgeContextIdsToMove =
+        set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds);
+    set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
+    OldCalleeEdge->AllocTypes =
+        computeAllocType(OldCalleeEdge->getContextIds());
+    if (!NewClone) {
+      // Update context ids / alloc type on corresponding edge to NewCallee.
+      // There is a chance this may not exist if we are reusing an existing
+      // clone, specifically during function assignment, where we would have
+      // removed none type edges after creating the clone. If we can't find
+      // a corresponding edge there, fall through to the cloning below.
+      if (auto *NewCalleeEdge =
+              NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
+        NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
+                                              EdgeContextIdsToMove.end());
+        NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
+        continue;
+      }
+    }
+    auto NewEdge = std::make_shared<ContextEdge>(
+        OldCalleeEdge->Callee, NewCallee,
+        computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
+    NewCallee->CalleeEdges.push_back(NewEdge);
+    NewEdge->Callee->CallerEdges.push_back(NewEdge);
+  }
+  if (VerifyCCG) {
+    checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee);
+    checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee);
+    for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
+      checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee);
+    for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
+      checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee);
+  }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
+  DenseSet<const ContextNode *> Visited;
+  for (auto &Entry : AllocationCallToContextNodeMap)
+    identifyClones(Entry.second, Visited);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
+    ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
+  if (VerifyNodes)
+    checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/true);
+  assert(!Node->CloneOf);
+
+  // If Node as a null call, then either it wasn't found in the module (regular
+  // LTO) or summary index (ThinLTO), or there were other conditions blocking
+  // cloning (e.g. recursion, calls multiple targets, etc).
+  // Do this here so that we don't try to recursively clone callers below, which
+  // isn't useful at least for this node.
+  if (!Node->hasCall())
+    return;
+
+#ifndef NDEBUG
+  auto Insert =
+#endif
+      Visited.insert(Node);
+  // We should not have visited this node yet.
+  assert(Insert.second);
+  // The recursive call to identifyClones may delete the current edge from the
+  // CallerEdges vector. Make a copy and iterate on that, simpler than passing
+  // in an iterator and having recursive call erase from it. Other edges may
+  // also get removed during the recursion, which will have null Callee and
+  // Caller pointers (and are deleted later), so we skip those below.
+  {
+    auto CallerEdges = Node->CallerEdges;
+    for (auto &Edge : CallerEdges) {
+      // Skip any that have been removed by an earlier recursive call.
+      if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
+        assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(),
+                           Edge));
+        continue;
+      }
+      // Ignore any caller we previously visited via another edge.
+      if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
+        identifyClones(Edge->Caller, Visited);
+      }
+    }
+  }
+
+  // Check if we reached an unambiguous call or have have only a single caller.
+  if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
+    return;
+
+  // We need to clone.
+
+  // Try to keep the original version as alloc type NotCold. This will make
+  // cases with indirect calls or any other situation with an unknown call to
+  // the original function get the default behavior. We do this by sorting the
+  // CallerEdges of the Node we will clone by alloc type.
+  //
+  // Give NotCold edge the lowest sort priority so those edges are at the end of
+  // the caller edges vector, and stay on the original version (since the below
+  // code clones greedily until it finds all remaining edges have the same type
+  // and leaves the remaining ones on the original Node).
+  //
+  // We shouldn't actually have any None type edges, so the sorting priority for
+  // that is arbitrary, and we assert in that case below.
+  constexpr unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4,
+                                                   /*Cold*/ 1,
+                                                   /*NotColdCold*/ 2};
+  assert(std::size(AllocTypeCloningPriority) ==
+         (std::size_t)AllocationType::All + 1);
+  std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
+                   [&](const std::shared_ptr<ContextEdge> &A,
+                       const std::shared_ptr<ContextEdge> &B) {
+                     assert(A->AllocTypes != (uint8_t)AllocationType::None &&
+                            B->AllocTypes != (uint8_t)AllocationType::None);
+                     if (A->AllocTypes == B->AllocTypes)
+                       // Use the first context id for each edge as a
+                       // tie-breaker.
+                       return *A->ContextIds.begin() < *B->ContextIds.begin();
+                     return AllocTypeCloningPriority[A->AllocTypes] <
+                            AllocTypeCloningPriority[B->AllocTypes];
+                   });
+
+  assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+
+  // Iterate until we find no more opportunities for disambiguating the alloc
+  // types via cloning. In most cases this loop will terminate once the Node
+  // has a single allocation type, in which case no more cloning is needed.
+  // We need to be able to remove Edge from CallerEdges, so need to adjust
+  // iterator inside the loop.
+  for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
+    auto CallerEdge = *EI;
+
+    // See if cloning the prior caller edge left this node with a single alloc
+    // type or a single caller. In that case no more cloning of Node is needed.
+    if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
+      break;
+
+    // Compute the node callee edge alloc types corresponding to the context ids
+    // for this caller edge.
+    std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
+    CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
+    for (auto &CalleeEdge : Node->CalleeEdges)
+      CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
+          CalleeEdge->getContextIds(), CallerEdge->getContextIds()));
+
+    // Don't clone if doing so will not disambiguate any alloc types amongst
+    // caller edges (including the callee edges that would be cloned).
+    // Otherwise we will simply move all edges to the clone.
+    //
+    // First check if by cloning we will disambiguate the caller allocation
+    // type from node's allocation type. Query allocTypeToUse so that we don't
+    // bother cloning to distinguish NotCold+Cold from NotCold. Note that
+    // neither of these should be None type.
+    //
+    // Then check if by cloning node at least one of the callee edges will be
+    // disambiguated by splitting out 
diff erent context ids.
+    assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
+    assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+    if (allocTypeToUse(CallerEdge->AllocTypes) ==
+            allocTypeToUse(Node->AllocTypes) &&
+        allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
+            CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
+      ++EI;
+      continue;
+    }
+
+    // First see if we can use an existing clone. Check each clone and its
+    // callee edges for matching alloc types.
+    ContextNode *Clone = nullptr;
+    for (auto *CurClone : Node->Clones) {
+      if (allocTypeToUse(CurClone->AllocTypes) !=
+          allocTypeToUse(CallerEdge->AllocTypes))
+        continue;
+
+      if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
+              CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
+        continue;
+      Clone = CurClone;
+      break;
+    }
+
+    // The edge iterator is adjusted when we move the CallerEdge to the clone.
+    if (Clone)
+      moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI);
+    else
+      Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI);
+
+    assert(EI == Node->CallerEdges.end() ||
+           Node->AllocTypes != (uint8_t)AllocationType::None);
+    // Sanity check that no alloc types on clone or its edges are None.
+    assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
+    assert(llvm::none_of(
+        Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
+          return E->AllocTypes == (uint8_t)AllocationType::None;
+        }));
+  }
+
+  // Cloning may have resulted in some cloned callee edges with type None,
+  // because they aren't carrying any contexts. Remove those edges.
+  for (auto *Clone : Node->Clones) {
+    removeNoneTypeCalleeEdges(Clone);
+    if (VerifyNodes)
+      checkNode<DerivedCCG, FuncTy, CallTy>(Clone, /*CheckEdges=*/true);
+  }
+  // We should still have some context ids on the original Node.
+  assert(!Node->ContextIds.empty());
+
+  // Remove any callee edges that ended up with alloc type None after creating
+  // clones and updating callee edges.
+  removeNoneTypeCalleeEdges(Node);
+
+  // Sanity check that no alloc types on node or edges are None.
+  assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+  assert(llvm::none_of(Node->CalleeEdges,
+                       [&](const std::shared_ptr<ContextEdge> &E) {
+                         return E->AllocTypes == (uint8_t)AllocationType::None;
+                       }));
+  assert(llvm::none_of(Node->CallerEdges,
+                       [&](const std::shared_ptr<ContextEdge> &E) {
+                         return E->AllocTypes == (uint8_t)AllocationType::None;
+                       }));
+
+  if (VerifyNodes)
+    checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/true);
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
   if (DumpCCG) {
@@ -1773,6 +2136,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
     check();
   }
 
+  identifyClones();
+
+  if (VerifyCCG) {
+    check();
+  }
+
+  if (DumpCCG) {
+    dbgs() << "CCG after cloning:\n";
+    dbgs() << *this;
+  }
+  if (ExportToDot)
+    exportToDot("cloned");
+
   return false;
 }
 

diff  --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index d8c78d270f277..4d11244b2b42b 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -42,6 +42,8 @@
 ; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should have cloned bar, baz, and foo, for the cold memory allocation.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "memprof-basic.ll"
@@ -142,6 +144,88 @@ uselistorder ptr @_Z3foov, { 1, 0 }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR]]
+; DUMP: 	Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 2, 3, 0
+; DUMP:                 AllocType 2 StackIds: 2, 3, 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2    (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAZ2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP: 	Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3    (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 0     (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 1     (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[FOO]]
+
+; DUMP: Node [[BAZ2]]
+; DUMP: 	Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAZ]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 2, 3, 0
+; DUMP:                 AllocType 2 StackIds: 2, 3, 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -155,3 +239,22 @@ uselistorder ptr @_Z3foov, { 1, 0 }
 ; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
index 772b319e0715e..3b297dd96cede 100644
--- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
+++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
@@ -64,6 +64,8 @@
 
 ; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
+;; We should clone D once for the cold allocations via C.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "duplicate-context-ids.ll"
@@ -205,6 +207,67 @@ declare i32 @sleep()
 ; DUMP: 	CallerEdges:
 
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[D]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 2 StackIds: 0
+; DUMP:                 AllocType 1 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
+
+; DUMP: Node [[F]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1       (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[C2]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0       (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[B]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[E]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[D2]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 2 StackIds: 0
+; DUMP:                 AllocType 1 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[D]]
+
+
 ; DOTPRE: digraph "prestackupdate" {
 ; DOTPRE: 	label="prestackupdate";
 ; DOTPRE: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
@@ -227,3 +290,18 @@ declare i32 @sleep()
 ; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
 ; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
 ; DOTPOST:}
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
+; DOTCLONED: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
+; DOTCLONED: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
+; DOTCLONED: 	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
+; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
index 30c8bd27f37b7..9cf209271ce4d 100644
--- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
+++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
@@ -64,6 +64,9 @@
 ; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should only create a single clone of foo, for the direct call
+;; from main allocating cold memory.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "indirectcall.ll"
@@ -240,6 +243,121 @@ uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[FOO]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 6, 8, 4
+; DUMP:                 AllocType 2 StackIds: 6, 8, 5
+; DUMP:                 AllocType 1 StackIds: 0
+; DUMP:                 AllocType 2 StackIds: 7, 8, 2
+; DUMP:                 AllocType 1 StackIds: 7, 8, 3
+; DUMP:                 AllocType 2 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 3 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[AX]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 6    (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[BAR]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+
+; DUMP: Node [[MAIN3]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 4   (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN4]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 5   (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 0    (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BX]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 7    (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+
+; DUMP: Node [[MAIN5]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 2   (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN6]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 3   (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 1    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 6, 8, 4
+; DUMP:                 AllocType 2 StackIds: 6, 8, 5
+; DUMP:                 AllocType 1 StackIds: 0
+; DUMP:                 AllocType 2 StackIds: 7, 8, 2
+; DUMP:                 AllocType 1 StackIds: 7, 8, 3
+; DUMP:                 AllocType 2 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP:		Clone of [[FOO]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -264,3 +382,29 @@ uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
 ; DOT: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2 3 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"];
+; DOTCLONED: 	Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[AX]] -> Node[[FOO2]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN3]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[BX]] -> Node[[FOO2]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN6]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 6",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 6",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll
index 89cd878e99fb4..7a2304f7202b4 100644
--- a/llvm/test/ThinLTO/X86/memprof-inlined.ll
+++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll
@@ -51,6 +51,9 @@
 ; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should create clones for foo and bar for the call from main to allocate
+;; cold memory.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "inlined.ll"
@@ -168,6 +171,91 @@ declare i32 @sleep()
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAZ]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 1, 2
+; DUMP:                 AllocType 2 StackIds: 1, 3
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+
+; DUMP: Node [[MAIN1]]
+; DUMP:         Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 2     (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP:         Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 3     (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 		Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BAR]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 0, 1, 2
+; DUMP:                 AllocType 2 StackIds: 0, 1, 3
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3
+; DUMP:         Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP:         Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP:         Clones: [[FOO3]]
+
+; DUMP: Node [[FOO3]]
+; DUMP:         Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[FOO]]
+
+; DUMP: Node [[BAR2]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 0, 1, 2
+; DUMP:                 AllocType 2 StackIds: 0, 1, 3
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -184,3 +272,23 @@ declare i32 @sleep()
 ; DOT: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
 ; DOT: 	Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"];
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3barv -\> alloc}"];
+; DOTCLONED: 	Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index 539d88a815ed1..7a48d66af47e0 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -37,6 +37,8 @@
 ; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should have cloned bar, baz, and foo, for the cold memory allocation.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -143,6 +145,82 @@ attributes #6 = { builtin }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR:0x[a-z0-9]+]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAZ2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[FOO]]
+
+; DUMP: Node [[BAZ2]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAZ]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -156,3 +234,22 @@ attributes #6 = { builtin }
 ; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
index c5ed97f182a98..aa5f539a61832 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
@@ -59,6 +59,8 @@
 
 ; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
+;; We should clone D once for the cold allocations via C.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -207,6 +209,60 @@ attributes #6 = { builtin }
 ; DUMP: 		Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[D]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
+
+; DUMP: Node [[F]]
+; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[C2]]
+; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[B]]
+; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[E]]
+; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[D2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[D]]
+
 
 ; DOTPRE: digraph "prestackupdate" {
 ; DOTPRE: 	label="prestackupdate";
@@ -230,3 +286,18 @@ attributes #6 = { builtin }
 ; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
 ; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
 ; DOTPOST:}
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
+; DOTCLONED: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
+; DOTCLONED: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
+; DOTCLONED: 	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
+; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
index 9ebf219dd37a0..e66ec0d76fe8b 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
@@ -57,6 +57,9 @@
 ; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should only create a single clone of foo, for the direct call
+;; from main allocating cold memory.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -235,6 +238,107 @@ attributes #7 = { builtin }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 3 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[AX]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[BAR]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+
+; DUMP: Node [[MAIN3]]
+; DUMP: 	  %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a)	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN4]]
+; DUMP: 	  %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a)	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BX]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+
+; DUMP: Node [[MAIN5]]
+; DUMP: 	  %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b)	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN6]]
+; DUMP: 	  %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b)	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP:		Clone of [[FOO]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -259,3 +363,29 @@ attributes #7 = { builtin }
 ; DOT: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2 3 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];
+; DOTCLONED: 	Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[AX]] -> Node[[FOO2]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN3]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[BX]] -> Node[[FOO2]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN6]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 6",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 6",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
index 59f135ca06627..a2fa703e04e7b 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
@@ -46,6 +46,9 @@
 ; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should create clones for foo and bar for the call from main to allocate
+;; cold memory.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -171,6 +174,82 @@ attributes #7 = { builtin }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP:         Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+; DUMP: 		Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP:         Clones: [[FOO3]]
+
+; DUMP: Node [[FOO3]]
+; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP:         Clone of [[FOO]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2
+; DUMP:         Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -187,3 +266,23 @@ attributes #7 = { builtin }
 ; DOT: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
 ; DOT: 	Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"];
+; DOTCLONED: 	Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: }


        


More information about the llvm-commits mailing list