[llvm] d0649a6 - [MemProf] Context disambiguation cloning pass [patch 2/3]

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 21 14:31:57 PDT 2023


Author: Teresa Johnson
Date: 2023-04-21T14:31:44-07:00
New Revision: d0649a6ad8be778abf7569f502148d577f8bc6f1

URL: https://github.com/llvm/llvm-project/commit/d0649a6ad8be778abf7569f502148d577f8bc6f1
DIFF: https://github.com/llvm/llvm-project/commit/d0649a6ad8be778abf7569f502148d577f8bc6f1.diff

LOG: [MemProf] Context disambiguation cloning pass [patch 2/3]

Performs cloning on the CallsiteContextGraph (not on the IR or summary
index), in order to uniquely identify the allocation behavior of an
allocation call given its context. In order to do this, the graph is
recursively traversed starting from the allocation nodes, until we
identify a point where the allocation behavior is unambiguous (the edges
have a single allocation type). Nodes are then cloned as we unwind the
recursion. We try to perform the minimal amount of cloning required to
disambiguate the contexts.

The follow-on patch will contain the support for applying the cloning to
the IR.

Depends on D140908 and D145836.

Differential Revision: https://reviews.llvm.org/D140949

Added: 
    

Modified: 
    llvm/include/llvm/IR/ModuleSummaryIndex.h
    llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
    llvm/test/ThinLTO/X86/memprof-basic.ll
    llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
    llvm/test/ThinLTO/X86/memprof-indirectcall.ll
    llvm/test/ThinLTO/X86/memprof-inlined.ll
    llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
    llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
    llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 39f44791921aa..c540fa5b4ba14 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -341,7 +341,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) {
 // Values should be powers of two so that they can be ORed, in particular to
 // track allocations that have 
diff erent behavior with 
diff erent calling
 // contexts.
-enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
+enum class AllocationType : uint8_t {
+  None = 0,
+  NotCold = 1,
+  Cold = 2,
+  All = 3 // This should always be set to the OR of all values.
+};
 
 /// Summary of a single MIB in a memprof metadata on allocations.
 struct MIBInfo {

diff  --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index f9ad2d30d07b9..3fbf47d2852c6 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -67,22 +67,6 @@ static cl::opt<bool>
     VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
                 cl::desc("Perform frequent verification checks on nodes."));
 
-inline bool hasSingleAllocType(uint8_t AllocTypes) {
-  switch (AllocTypes) {
-  case (uint8_t)AllocationType::Cold:
-  case (uint8_t)AllocationType::NotCold:
-    return true;
-    break;
-  case (uint8_t)AllocationType::None:
-    assert(false);
-    break;
-  default:
-    return false;
-    break;
-  }
-  llvm_unreachable("invalid alloc type");
-}
-
 /// CRTP base for graphs built from either IR or ThinLTO summary index.
 ///
 /// The graph represents the call contexts in all memprof metadata on allocation
@@ -107,6 +91,10 @@ class CallsiteContextGraph {
   /// Main entry point to perform analysis and transformations on graph.
   bool process();
 
+  /// Perform cloning on the graph necessary to uniquely identify the allocation
+  /// behavior of an allocation based on its context.
+  void identifyClones();
+
   void dump() const;
   void print(raw_ostream &OS) const;
 
@@ -214,16 +202,15 @@ class CallsiteContextGraph {
     ContextNode(bool IsAllocation, CallInfo C)
         : IsAllocation(IsAllocation), Call(C) {}
 
-    std::unique_ptr<ContextNode> clone() {
-      auto Clone = std::make_unique<ContextNode>(IsAllocation, Call);
+    void addClone(ContextNode *Clone) {
       if (CloneOf) {
-        CloneOf->Clones.push_back(Clone.get());
+        CloneOf->Clones.push_back(Clone);
         Clone->CloneOf = CloneOf;
       } else {
-        Clones.push_back(Clone.get());
+        Clones.push_back(Clone);
+        assert(!Clone->CloneOf);
         Clone->CloneOf = this;
       }
-      return Clone;
     }
 
     ContextNode *getOrigNode() {
@@ -292,6 +279,10 @@ class CallsiteContextGraph {
     }
   };
 
+  /// Helper to remove callee edges that have allocation type None (due to not
+  /// carrying any context ids) after transformations.
+  void removeNoneTypeCalleeEdges(ContextNode *Node);
+
 protected:
   /// Get a list of nodes corresponding to the stack ids in the given callsite
   /// context.
@@ -403,6 +394,40 @@ class CallsiteContextGraph {
   /// unioning their recorded alloc types.
   uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
 
+  /// Returns the alloction type of the intersection of the contexts of two
+  /// nodes (based on their provided context id sets), optimized for the case
+  /// when Node1Ids is smaller than Node2Ids.
+  uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
+                                  const DenseSet<uint32_t> &Node2Ids);
+
+  /// Returns the alloction type of the intersection of the contexts of two
+  /// nodes (based on their provided context id sets).
+  uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
+                              const DenseSet<uint32_t> &Node2Ids);
+
+  /// Create a clone of Edge's callee and move Edge to that new callee node,
+  /// performing the necessary context id and allocation type updates.
+  /// If callee's caller edge iterator is supplied, it is updated when removing
+  /// the edge from that list.
+  ContextNode *
+  moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+                           EdgeIter *CallerEdgeI = nullptr);
+
+  /// Change the callee of Edge to existing callee clone NewCallee, performing
+  /// the necessary context id and allocation type updates.
+  /// If callee's caller edge iterator is supplied, it is updated when removing
+  /// the edge from that list.
+  void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+                                     ContextNode *NewCallee,
+                                     EdgeIter *CallerEdgeI = nullptr,
+                                     bool NewClone = false);
+
+  /// Recursively perform cloning on the graph for the given Node and its
+  /// callers, in order to uniquely identify the allocation behavior of an
+  /// allocation given its context.
+  void identifyClones(ContextNode *Node,
+                      DenseSet<const ContextNode *> &Visited);
+
   /// Map from each context ID to the AllocationType assigned to that context.
   std::map<uint32_t, AllocationType> ContextIdToAllocationType;
 
@@ -543,6 +568,28 @@ AllocationType allocTypeToUse(uint8_t AllocTypes) {
     return (AllocationType)AllocTypes;
 }
 
+// Helper to check if the alloc types for all edges recorded in the
+// InAllocTypes vector match the alloc types for all edges in the Edges
+// vector.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+bool allocTypesMatch(
+    const std::vector<uint8_t> &InAllocTypes,
+    const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
+        &Edges) {
+  return std::equal(
+      InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
+      [](const uint8_t &l,
+         const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
+        // Can share if one of the edges is None type - don't
+        // care about the type along that edge as it doesn't
+        // exist for those context ids.
+        if (l == (uint8_t)AllocationType::None ||
+            r->AllocTypes == (uint8_t)AllocationType::None)
+          return true;
+        return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
+      });
+}
+
 } // end anonymous namespace
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -607,6 +654,20 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
   Caller->CalleeEdges.push_back(Edge);
 }
 
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<
+    DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
+  for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
+    auto Edge = *EI;
+    if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
+      assert(Edge->ContextIds.empty());
+      Edge->Callee->eraseCallerEdge(Edge.get());
+      EI = Node->CalleeEdges.erase(EI);
+    } else
+      ++EI;
+  }
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
@@ -666,6 +727,33 @@ uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
   return AllocType;
 }
 
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+uint8_t
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
+    const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
+  uint8_t BothTypes =
+      (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
+  uint8_t AllocType = (uint8_t)AllocationType::None;
+  for (auto Id : Node1Ids) {
+    if (!Node2Ids.count(Id))
+      continue;
+    AllocType |= (uint8_t)ContextIdToAllocationType[Id];
+    // Bail early if alloc type reached both, no further refinement.
+    if (AllocType == BothTypes)
+      return AllocType;
+  }
+  return AllocType;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
+    const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
+  if (Node1Ids.size() < Node2Ids.size())
+    return intersectAllocTypesImpl(Node1Ids, Node2Ids);
+  else
+    return intersectAllocTypesImpl(Node2Ids, Node1Ids);
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
@@ -1121,7 +1209,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
       // not fully matching stack contexts. To do this, subtract any context ids
       // found in caller nodes of the last node found above.
       if (Ids.back() != getLastStackId(Call)) {
-        for (const auto &PE : LastNode->CallerEdges) {
+        for (const auto &PE : CurNode->CallerEdges) {
           set_subtract(StackSequenceContextIds, PE->getContextIds());
           if (StackSequenceContextIds.empty())
             break;
@@ -1572,7 +1660,8 @@ static void checkEdge(
 }
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) {
+static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
+                      bool CheckEdges = false) {
   if (Node->isRemoved())
     return;
   // Node's context ids should be the union of both its callee and caller edge
@@ -1584,6 +1673,8 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) {
     DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
     for (; EI != Node->CallerEdges.end(); EI++) {
       const auto &Edge = *EI;
+      if (CheckEdges)
+        checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
       set_union(CallerEdgeContextIds, Edge->ContextIds);
     }
     // Node can have more context ids than callers if some contexts terminate at
@@ -1598,6 +1689,8 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) {
     DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
     for (; EI != Node->CalleeEdges.end(); EI++) {
       const auto &Edge = *EI;
+      if (CheckEdges)
+        checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
       set_union(CalleeEdgeContextIds, Edge->ContextIds);
     }
     assert(Node->ContextIds == CalleeEdgeContextIds);
@@ -1760,6 +1853,276 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
              DotFilePathPrefix + "ccg." + Label + ".dot");
 }
 
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+ContextNode<DerivedCCG, FuncTy, CallTy> *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
+    const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) {
+  ContextNode *Node = Edge->Callee;
+  NodeOwner.push_back(
+      std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
+  ContextNode *Clone = NodeOwner.back().get();
+  Node->addClone(Clone);
+  assert(NodeToCallingFunc.count(Node));
+  NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
+  moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true);
+  return Clone;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
+    moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+                                  ContextNode *NewCallee, EdgeIter *CallerEdgeI,
+                                  bool NewClone) {
+  // NewCallee and Edge's current callee must be clones of the same original
+  // node (Edge's current callee may be the original node too).
+  assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
+  auto &EdgeContextIds = Edge->getContextIds();
+  ContextNode *OldCallee = Edge->Callee;
+  if (CallerEdgeI)
+    *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
+  else
+    OldCallee->eraseCallerEdge(Edge.get());
+  Edge->Callee = NewCallee;
+  NewCallee->CallerEdges.push_back(Edge);
+  // Don't need to update Edge's context ids since we are simply reconnecting
+  // it.
+  set_subtract(OldCallee->ContextIds, EdgeContextIds);
+  NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end());
+  NewCallee->AllocTypes |= Edge->AllocTypes;
+  OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
+  // OldCallee alloc type should be None iff its context id set is now empty.
+  assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
+         OldCallee->ContextIds.empty());
+  // Now walk the old callee node's callee edges and move Edge's context ids
+  // over to the corresponding edge into the clone (which is created here if
+  // this is a newly created clone).
+  for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
+    // The context ids moving to the new callee are the subset of this edge's
+    // context ids and the context ids on the caller edge being moved.
+    DenseSet<uint32_t> EdgeContextIdsToMove =
+        set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds);
+    set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
+    OldCalleeEdge->AllocTypes =
+        computeAllocType(OldCalleeEdge->getContextIds());
+    if (!NewClone) {
+      // Update context ids / alloc type on corresponding edge to NewCallee.
+      // There is a chance this may not exist if we are reusing an existing
+      // clone, specifically during function assignment, where we would have
+      // removed none type edges after creating the clone. If we can't find
+      // a corresponding edge there, fall through to the cloning below.
+      if (auto *NewCalleeEdge =
+              NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
+        NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
+                                              EdgeContextIdsToMove.end());
+        NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
+        continue;
+      }
+    }
+    auto NewEdge = std::make_shared<ContextEdge>(
+        OldCalleeEdge->Callee, NewCallee,
+        computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
+    NewCallee->CalleeEdges.push_back(NewEdge);
+    NewEdge->Callee->CallerEdges.push_back(NewEdge);
+  }
+  if (VerifyCCG) {
+    checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee);
+    checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee);
+    for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
+      checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee);
+    for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
+      checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee);
+  }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
+  DenseSet<const ContextNode *> Visited;
+  for (auto &Entry : AllocationCallToContextNodeMap)
+    identifyClones(Entry.second, Visited);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
+    ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
+  if (VerifyNodes)
+    checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/true);
+  assert(!Node->CloneOf);
+
+  // If Node as a null call, then either it wasn't found in the module (regular
+  // LTO) or summary index (ThinLTO), or there were other conditions blocking
+  // cloning (e.g. recursion, calls multiple targets, etc).
+  // Do this here so that we don't try to recursively clone callers below, which
+  // isn't useful at least for this node.
+  if (!Node->hasCall())
+    return;
+
+#ifndef NDEBUG
+  auto Insert =
+#endif
+      Visited.insert(Node);
+  // We should not have visited this node yet.
+  assert(Insert.second);
+  // The recursive call to identifyClones may delete the current edge from the
+  // CallerEdges vector. Make a copy and iterate on that, simpler than passing
+  // in an iterator and having recursive call erase from it. Other edges may
+  // also get removed during the recursion, which will have null Callee and
+  // Caller pointers (and are deleted later), so we skip those below.
+  {
+    auto CallerEdges = Node->CallerEdges;
+    for (auto &Edge : CallerEdges) {
+      // Skip any that have been removed by an earlier recursive call.
+      if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
+        assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(),
+                           Edge));
+        continue;
+      }
+      // Ignore any caller we previously visited via another edge.
+      if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
+        identifyClones(Edge->Caller, Visited);
+      }
+    }
+  }
+
+  // Check if we reached an unambiguous call or have have only a single caller.
+  if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
+    return;
+
+  // We need to clone.
+
+  // Try to keep the original version as alloc type NotCold. This will make
+  // cases with indirect calls or any other situation with an unknown call to
+  // the original function get the default behavior. We do this by sorting the
+  // CallerEdges of the Node we will clone by alloc type.
+  //
+  // Give NotCold edge the lowest sort priority so those edges are at the end of
+  // the caller edges vector, and stay on the original version (since the below
+  // code clones greedily until it finds all remaining edges have the same type
+  // and leaves the remaining ones on the original Node).
+  //
+  // We shouldn't actually have any None type edges, so the sorting priority for
+  // that is arbitrary, and we assert in that case below.
+  constexpr unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4,
+                                                   /*Cold*/ 1,
+                                                   /*NotColdCold*/ 2};
+  assert(std::size(AllocTypeCloningPriority) ==
+         (std::size_t)AllocationType::All + 1);
+  std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
+                   [](const std::shared_ptr<ContextEdge> &A,
+                      const std::shared_ptr<ContextEdge> &B) {
+                     assert(A->AllocTypes != (uint8_t)AllocationType::None &&
+                            B->AllocTypes != (uint8_t)AllocationType::None);
+                     if (A->AllocTypes == B->AllocTypes)
+                       // Use the first context id for each edge as a
+                       // tie-breaker.
+                       return *A->ContextIds.begin() < *B->ContextIds.begin();
+                     return AllocTypeCloningPriority[A->AllocTypes] <
+                            AllocTypeCloningPriority[B->AllocTypes];
+                   });
+
+  assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+
+  // Iterate until we find no more opportunities for disambiguating the alloc
+  // types via cloning. In most cases this loop will terminate once the Node
+  // has a single allocation type, in which case no more cloning is needed.
+  // We need to be able to remove Edge from CallerEdges, so need to adjust
+  // iterator inside the loop.
+  for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
+    auto CallerEdge = *EI;
+
+    // See if cloning the prior caller edge left this node with a single alloc
+    // type or a single caller. In that case no more cloning of Node is needed.
+    if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
+      break;
+
+    // Compute the node callee edge alloc types corresponding to the context ids
+    // for this caller edge.
+    std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
+    CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
+    for (auto &CalleeEdge : Node->CalleeEdges)
+      CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
+          CalleeEdge->getContextIds(), CallerEdge->getContextIds()));
+
+    // Don't clone if doing so will not disambiguate any alloc types amongst
+    // caller edges (including the callee edges that would be cloned).
+    // Otherwise we will simply move all edges to the clone.
+    //
+    // First check if by cloning we will disambiguate the caller allocation
+    // type from node's allocation type. Query allocTypeToUse so that we don't
+    // bother cloning to distinguish NotCold+Cold from NotCold. Note that
+    // neither of these should be None type.
+    //
+    // Then check if by cloning node at least one of the callee edges will be
+    // disambiguated by splitting out 
diff erent context ids.
+    assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
+    assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+    if (allocTypeToUse(CallerEdge->AllocTypes) ==
+            allocTypeToUse(Node->AllocTypes) &&
+        allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
+            CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
+      ++EI;
+      continue;
+    }
+
+    // First see if we can use an existing clone. Check each clone and its
+    // callee edges for matching alloc types.
+    ContextNode *Clone = nullptr;
+    for (auto *CurClone : Node->Clones) {
+      if (allocTypeToUse(CurClone->AllocTypes) !=
+          allocTypeToUse(CallerEdge->AllocTypes))
+        continue;
+
+      if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
+              CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
+        continue;
+      Clone = CurClone;
+      break;
+    }
+
+    // The edge iterator is adjusted when we move the CallerEdge to the clone.
+    if (Clone)
+      moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI);
+    else
+      Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI);
+
+    assert(EI == Node->CallerEdges.end() ||
+           Node->AllocTypes != (uint8_t)AllocationType::None);
+    // Sanity check that no alloc types on clone or its edges are None.
+    assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
+    assert(llvm::none_of(
+        Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
+          return E->AllocTypes == (uint8_t)AllocationType::None;
+        }));
+  }
+
+  // Cloning may have resulted in some cloned callee edges with type None,
+  // because they aren't carrying any contexts. Remove those edges.
+  for (auto *Clone : Node->Clones) {
+    removeNoneTypeCalleeEdges(Clone);
+    if (VerifyNodes)
+      checkNode<DerivedCCG, FuncTy, CallTy>(Clone, /*CheckEdges=*/true);
+  }
+  // We should still have some context ids on the original Node.
+  assert(!Node->ContextIds.empty());
+
+  // Remove any callee edges that ended up with alloc type None after creating
+  // clones and updating callee edges.
+  removeNoneTypeCalleeEdges(Node);
+
+  // Sanity check that no alloc types on node or edges are None.
+  assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+  assert(llvm::none_of(Node->CalleeEdges,
+                       [&](const std::shared_ptr<ContextEdge> &E) {
+                         return E->AllocTypes == (uint8_t)AllocationType::None;
+                       }));
+  assert(llvm::none_of(Node->CallerEdges,
+                       [&](const std::shared_ptr<ContextEdge> &E) {
+                         return E->AllocTypes == (uint8_t)AllocationType::None;
+                       }));
+
+  if (VerifyNodes)
+    checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/true);
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
   if (DumpCCG) {
@@ -1773,6 +2136,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
     check();
   }
 
+  identifyClones();
+
+  if (VerifyCCG) {
+    check();
+  }
+
+  if (DumpCCG) {
+    dbgs() << "CCG after cloning:\n";
+    dbgs() << *this;
+  }
+  if (ExportToDot)
+    exportToDot("cloned");
+
   return false;
 }
 

diff  --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index d8c78d270f277..4d11244b2b42b 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -42,6 +42,8 @@
 ; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should have cloned bar, baz, and foo, for the cold memory allocation.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "memprof-basic.ll"
@@ -142,6 +144,88 @@ uselistorder ptr @_Z3foov, { 1, 0 }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR]]
+; DUMP: 	Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 2, 3, 0
+; DUMP:                 AllocType 2 StackIds: 2, 3, 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2    (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAZ2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP: 	Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3    (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 0     (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 1     (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[FOO]]
+
+; DUMP: Node [[BAZ2]]
+; DUMP: 	Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAZ]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 2, 3, 0
+; DUMP:                 AllocType 2 StackIds: 2, 3, 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -155,3 +239,22 @@ uselistorder ptr @_Z3foov, { 1, 0 }
 ; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
index 772b319e0715e..3b297dd96cede 100644
--- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
+++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
@@ -64,6 +64,8 @@
 
 ; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
+;; We should clone D once for the cold allocations via C.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "duplicate-context-ids.ll"
@@ -205,6 +207,67 @@ declare i32 @sleep()
 ; DUMP: 	CallerEdges:
 
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[D]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 2 StackIds: 0
+; DUMP:                 AllocType 1 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
+
+; DUMP: Node [[F]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1       (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[C2]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0       (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[B]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[E]]
+; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[D2]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 2 StackIds: 0
+; DUMP:                 AllocType 1 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[D]]
+
+
 ; DOTPRE: digraph "prestackupdate" {
 ; DOTPRE: 	label="prestackupdate";
 ; DOTPRE: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
@@ -227,3 +290,18 @@ declare i32 @sleep()
 ; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
 ; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
 ; DOTPOST:}
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
+; DOTCLONED: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
+; DOTCLONED: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
+; DOTCLONED: 	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
+; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
index 30c8bd27f37b7..9cf209271ce4d 100644
--- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
+++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
@@ -64,6 +64,9 @@
 ; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should only create a single clone of foo, for the direct call
+;; from main allocating cold memory.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "indirectcall.ll"
@@ -240,6 +243,121 @@ uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[FOO]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 6, 8, 4
+; DUMP:                 AllocType 2 StackIds: 6, 8, 5
+; DUMP:                 AllocType 1 StackIds: 0
+; DUMP:                 AllocType 2 StackIds: 7, 8, 2
+; DUMP:                 AllocType 1 StackIds: 7, 8, 3
+; DUMP:                 AllocType 2 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 3 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[AX]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 6    (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[BAR]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+
+; DUMP: Node [[MAIN3]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 4   (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN4]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 5   (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 0    (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BX]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 7    (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+
+; DUMP: Node [[MAIN5]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 2   (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN6]]
+; DUMP: 	Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 3   (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 1    (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 6, 8, 4
+; DUMP:                 AllocType 2 StackIds: 6, 8, 5
+; DUMP:                 AllocType 1 StackIds: 0
+; DUMP:                 AllocType 2 StackIds: 7, 8, 2
+; DUMP:                 AllocType 1 StackIds: 7, 8, 3
+; DUMP:                 AllocType 2 StackIds: 1
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP:		Clone of [[FOO]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -264,3 +382,29 @@ uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
 ; DOT: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2 3 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"];
+; DOTCLONED: 	Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[AX]] -> Node[[FOO2]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN3]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[BX]] -> Node[[FOO2]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN6]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 6",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 6",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll
index 89cd878e99fb4..7a2304f7202b4 100644
--- a/llvm/test/ThinLTO/X86/memprof-inlined.ll
+++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll
@@ -51,6 +51,9 @@
 ; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should create clones for foo and bar for the call from main to allocate
+;; cold memory.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 source_filename = "inlined.ll"
@@ -168,6 +171,91 @@ declare i32 @sleep()
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAZ]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 1, 2
+; DUMP:                 AllocType 2 StackIds: 1, 3
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+
+; DUMP: Node [[MAIN1]]
+; DUMP:         Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 2     (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP:         Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 3     (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 		Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BAR]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 0, 1, 2
+; DUMP:                 AllocType 2 StackIds: 0, 1, 3
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3
+; DUMP:         Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP:         Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP:         Clones: [[FOO3]]
+
+; DUMP: Node [[FOO3]]
+; DUMP:         Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[FOO]]
+
+; DUMP: Node [[BAR2]]
+; DUMP:         Versions: 1 MIB:
+; DUMP:                 AllocType 1 StackIds: 0, 1, 2
+; DUMP:                 AllocType 2 StackIds: 0, 1, 3
+; DUMP:         (clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -184,3 +272,23 @@ declare i32 @sleep()
 ; DOT: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
 ; DOT: 	Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"];
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3barv -\> alloc}"];
+; DOTCLONED: 	Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index 539d88a815ed1..7a48d66af47e0 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -37,6 +37,8 @@
 ; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should have cloned bar, baz, and foo, for the cold memory allocation.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -143,6 +145,82 @@ attributes #6 = { builtin }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR:0x[a-z0-9]+]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[BAZ2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[FOO]]
+
+; DUMP: Node [[BAZ2]]
+; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAZ]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
+; DUMP:		Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -156,3 +234,22 @@ attributes #6 = { builtin }
 ; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"];
+; DOTCLONED: 	Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
index c5ed97f182a98..aa5f539a61832 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
@@ -59,6 +59,8 @@
 
 ; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
+;; We should clone D once for the cold allocations via C.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -207,6 +209,60 @@ attributes #6 = { builtin }
 ; DUMP: 		Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[D]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
+
+; DUMP: Node [[F]]
+; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[C2]]
+; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[B]]
+; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[E]]
+; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[D2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 1 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
+; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
+; DUMP:         Clone of [[D]]
+
 
 ; DOTPRE: digraph "prestackupdate" {
 ; DOTPRE: 	label="prestackupdate";
@@ -230,3 +286,18 @@ attributes #6 = { builtin }
 ; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
 ; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
 ; DOTPOST:}
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
+; DOTCLONED: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
+; DOTCLONED: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
+; DOTCLONED: 	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
+; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
+; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
index 9ebf219dd37a0..e66ec0d76fe8b 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
@@ -57,6 +57,9 @@
 ; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should only create a single clone of foo, for the direct call
+;; from main allocating cold memory.
+; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -235,6 +238,107 @@ attributes #7 = { builtin }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
 ; DUMP: 	CallerEdges:
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 3 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP:		Clones: [[FOO2:0x[a-z0-9]+]]
+
+; DUMP: Node [[AX]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+
+; DUMP: Node [[BAR]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 1 2 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+
+; DUMP: Node [[MAIN3]]
+; DUMP: 	  %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a)	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN4]]
+; DUMP: 	  %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a)	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BX]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 4 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
+
+; DUMP: Node [[MAIN5]]
+; DUMP: 	  %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b)	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN6]]
+; DUMP: 	  %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b)	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 5
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 6
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
+; DUMP:		Clone of [[FOO]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -259,3 +363,29 @@ attributes #7 = { builtin }
 ; DOT: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
 ; DOT: 	Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2 3 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];
+; DOTCLONED: 	Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[AX]] -> Node[[FOO2]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN3]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"];
+; DOTCLONED: 	Node[[BX]] -> Node[[FOO2]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"];
+; DOTCLONED: 	Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN6]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 6",fillcolor="cyan"];
+; DOTCLONED: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 6",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];
+; DOTCLONED: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
index 59f135ca06627..a2fa703e04e7b 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
@@ -46,6 +46,9 @@
 ; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
+;; We should create clones for foo and bar for the call from main to allocate
+;; cold memory.
+; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -171,6 +174,82 @@ attributes #7 = { builtin }
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
 ; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
 
+; DUMP: CCG after cloning:
+; DUMP: Callsite Context Graph:
+; DUMP: Node [[BAR]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP:         Clones: [[BAR2:0x[a-z0-9]+]]
+
+; DUMP: Node [[FOO2]]
+; DUMP: 	null Call
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+
+; DUMP: Node [[MAIN1]]
+; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1 3
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[MAIN2]]
+; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2 4
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
+; DUMP: 		Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+
+; DUMP: Node [[BAZ]]
+; DUMP: 	  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: NotColdCold
+; DUMP: 	ContextIds: 3 4
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
+
+; DUMP: Node [[FOO]]
+; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: NotCold
+; DUMP: 	ContextIds: 1
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
+; DUMP:         Clones: [[FOO3]]
+
+; DUMP: Node [[FOO3]]
+; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
+; DUMP:         Clone of [[FOO]]
+
+; DUMP: Node [[BAR2]]
+; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
+; DUMP: 	AllocTypes: Cold
+; DUMP: 	ContextIds: 2
+; DUMP: 	CalleeEdges:
+; DUMP: 	CallerEdges:
+; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2
+; DUMP:         Clone of [[BAR]]
+
 
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
@@ -187,3 +266,23 @@ attributes #7 = { builtin }
 ; DOT: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
 ; DOT: 	Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
 ; DOT: }
+
+
+; DOTCLONED: digraph "cloned" {
+; DOTCLONED: 	label="cloned";
+; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
+; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
+; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"];
+; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"];
+; DOTCLONED: 	Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
+; DOTCLONED: 	Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
+; DOTCLONED: 	Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
+; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
+; DOTCLONED: }


        


More information about the llvm-commits mailing list