[llvm] 883dbb9 - Revert "[MemProf] Context disambiguation cloning pass [patch 1a/3]"

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 22 07:44:51 PDT 2023


Author: Nikita Popov
Date: 2023-03-22T15:43:46+01:00
New Revision: 883dbb9c86be87593a58ef10b070b3a0564c7fee

URL: https://github.com/llvm/llvm-project/commit/883dbb9c86be87593a58ef10b070b3a0564c7fee
DIFF: https://github.com/llvm/llvm-project/commit/883dbb9c86be87593a58ef10b070b3a0564c7fee.diff

LOG: Revert "[MemProf] Context disambiguation cloning pass [patch 1a/3]"

This reverts commit d6ad4f01c3dafcab335bca66dac6e36d9eac8421.

Fails to build on at least gcc 12.2:

/home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:482:1: error: no declaration matches ‘ContextNode<DerivedCCG, FuncTy, CallTy>* CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(const CallInfo&)’
  482 | CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
      | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:393:16: note: candidate is: ‘CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode* CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(const CallInfo&)’
  393 |   ContextNode *getNodeForInst(const CallInfo &C);
      |                ^~~~~~~~~~~~~~
/home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:99:7: note: ‘class CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>’ defined here
   99 | class CallsiteContextGraph {
      |       ^~~~~~~~~~~~~~~~~~~~

Added: 
    llvm/test/ThinLTO/X86/memprof-summary.ll

Modified: 
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Transforms/IPO/CMakeLists.txt

Removed: 
    llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
    llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
    llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
    llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
    llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
    llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
    llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
deleted file mode 100644
index 56e56ed67f7df..0000000000000
--- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements support for context disambiguation of allocation calls for profile
-// guided heap optimization using memprof metadata. See implementation file for
-// details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
-#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-class Module;
-
-class MemProfContextDisambiguation
-    : public PassInfoMixin<MemProfContextDisambiguation> {
-  /// Run the context disambiguator on \p M, returns true if any changes made.
-  bool processModule(Module &M);
-
-public:
-  MemProfContextDisambiguation() {}
-
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-};
-} // end namespace llvm
-
-#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a04f8bbaa5dc0..89d2e6a4b2d1a 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -117,7 +117,6 @@
 #include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/LoopExtractor.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
-#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
 #include "llvm/Transforms/IPO/MergeFunctions.h"
 #include "llvm/Transforms/IPO/ModuleInliner.h"
 #include "llvm/Transforms/IPO/OpenMPOpt.h"

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index aaabe23049288..1d386139d9e6c 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -57,7 +57,6 @@
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
 #include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
-#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
 #include "llvm/Transforms/IPO/MergeFunctions.h"
 #include "llvm/Transforms/IPO/ModuleInliner.h"
 #include "llvm/Transforms/IPO/OpenMPOpt.h"
@@ -272,10 +271,6 @@ static cl::opt<AttributorRunOption> AttributorRun(
                clEnumValN(AttributorRunOption::NONE, "none",
                           "disable attributor runs")));
 
-cl::opt<bool> EnableMemProfContextDisambiguation(
-    "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
-    cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
-
 PipelineTuningOptions::PipelineTuningOptions() {
   LoopInterleaving = true;
   LoopVectorization = true;
@@ -1714,12 +1709,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
       InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
                     InlinePass::CGSCCInliner}));
 
-  // Perform context disambiguation after inlining, since that would reduce the
-  // amount of additional cloning required to distinguish the allocation
-  // contexts.
-  if (EnableMemProfContextDisambiguation)
-    MPM.addPass(MemProfContextDisambiguation());
-
   // Optimize globals again after we ran the inliner.
   MPM.addPass(GlobalOptPass());
 

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82592a1ee9b55..04d648580a8c5 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -87,7 +87,6 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
 MODULE_PASS("no-op-module", NoOpModulePass())
 MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
 MODULE_PASS("partial-inliner", PartialInlinerPass())
-MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
 MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
 MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
 MODULE_PASS("pgo-instr-use", PGOInstrumentationUse())

diff  --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index e03aff0f65d7a..063a9a60d0cb5 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -27,7 +27,6 @@ add_llvm_component_library(LLVMipo
   Internalize.cpp
   LoopExtractor.cpp
   LowerTypeTests.cpp
-  MemProfContextDisambiguation.cpp
   MergeFunctions.cpp
   ModuleInliner.cpp
   OpenMPOpt.cpp

diff  --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
deleted file mode 100644
index fc8b12df67822..0000000000000
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ /dev/null
@@ -1,1583 +0,0 @@
-//==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements support for context disambiguation of allocation
-// calls for profile guided heap optimization. Specifically, it uses Memprof
-// profiles which indicate context specific allocation behavior (currently
-// distinguishing cold vs hot memory allocations). Cloning is performed to
-// expose the cold allocation call contexts, and the allocation calls are
-// subsequently annotated with an attribute for later transformation.
-//
-// The transformations can be performed either directly on IR (regular LTO), or
-// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO
-// backend). Both types of LTO operate on a the same base graph representation,
-// which uses CRTP to support either IR or Index formats.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/MemoryProfileInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO.h"
-#include <sstream>
-#include <vector>
-using namespace llvm;
-using namespace llvm::memprof;
-
-#define DEBUG_TYPE "memprof-context-disambiguation"
-
-static cl::opt<std::string> DotFilePathPrefix(
-    "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
-    cl::value_desc("filename"),
-    cl::desc("Specify the path prefix of the MemProf dot files."));
-
-static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
-                                 cl::Hidden,
-                                 cl::desc("Export graph to dot files."));
-
-static cl::opt<bool>
-    DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
-            cl::desc("Dump CallingContextGraph to stdout after each stage."));
-
-static cl::opt<bool>
-    VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
-              cl::desc("Perform verification checks on CallingContextGraph."));
-
-static cl::opt<bool>
-    VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
-                cl::desc("Perform frequent verification checks on nodes."));
-
-inline bool hasSingleAllocType(uint8_t AllocTypes) {
-  switch (AllocTypes) {
-  case (uint8_t)AllocationType::Cold:
-  case (uint8_t)AllocationType::NotCold:
-    return true;
-    break;
-  case (uint8_t)AllocationType::None:
-    assert(false);
-    break;
-  default:
-    return false;
-    break;
-  }
-  llvm_unreachable("invalid alloc type");
-}
-
-/// CRTP base for graphs built from either IR or ThinLTO summary index.
-///
-/// The graph represents the call contexts in all memprof metadata on allocation
-/// calls, with nodes for the allocations themselves, as well as for the calls
-/// in each context. The graph is initially built from the allocation memprof
-/// metadata (or summary) MIBs. It is then updated to match calls with callsite
-/// metadata onto the nodes, updating it to reflect any inlining performed on
-/// those calls.
-///
-/// Each MIB (representing an allocation's call context with allocation
-/// behavior) is assigned a unique context id during the graph build. The edges
-/// and nodes in the graph are decorated with the context ids they carry. This
-/// is used to correctly update the graph when cloning is performed so that we
-/// can uniquify the context for a single (possibly cloned) allocation.
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-class CallsiteContextGraph {
-public:
-  CallsiteContextGraph() = default;
-  CallsiteContextGraph(const CallsiteContextGraph &) = default;
-  CallsiteContextGraph(CallsiteContextGraph &&) = default;
-
-  /// Main entry point to perform analysis and transformations on graph.
-  bool process();
-
-  void dump() const;
-  void print(raw_ostream &OS) const;
-
-  friend raw_ostream &operator<<(raw_ostream &OS,
-                                 const CallsiteContextGraph &CCG) {
-    CCG.print(OS);
-    return OS;
-  }
-
-  friend struct GraphTraits<
-      const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
-  friend struct DOTGraphTraits<
-      const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
-
-  void exportToDot(std::string Label) const;
-
-  /// Represents a function clone via FuncTy pointer and clone number pair.
-  struct FuncInfo final
-      : public std::pair<FuncTy *, unsigned /*Clone number*/> {
-    using Base = std::pair<FuncTy *, unsigned>;
-    FuncInfo(const Base &B) : Base(B) {}
-    FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}
-    explicit operator bool() const { return this->first != nullptr; }
-    FuncTy *func() const { return this->first; }
-    unsigned cloneNo() const { return this->second; }
-  };
-
-  /// Represents a callsite clone via CallTy and clone number pair.
-  struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> {
-    using Base = std::pair<CallTy, unsigned>;
-    CallInfo(const Base &B) : Base(B) {}
-    CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)
-        : Base(Call, CloneNo) {}
-    explicit operator bool() const { return (bool)this->first; }
-    CallTy call() const { return this->first; }
-    unsigned cloneNo() const { return this->second; }
-    void setCloneNo(unsigned N) { this->second = N; }
-    void print(raw_ostream &OS) const {
-      if (!operator bool()) {
-        assert(!cloneNo());
-        OS << "null Call";
-        return;
-      }
-      call()->print(OS);
-      OS << "\t(clone " << cloneNo() << ")";
-    }
-    void dump() const {
-      print(dbgs());
-      dbgs() << "\n";
-    }
-    friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {
-      Call.print(OS);
-      return OS;
-    }
-  };
-
-  struct ContextEdge;
-
-  /// Node in the Callsite Context Graph
-  struct ContextNode {
-    // Keep this for now since in the IR case where we have an Instruction* it
-    // is not as immediately discoverable. Used for printing richer information
-    // when dumping graph.
-    bool IsAllocation;
-
-    // Keeps track of when the Call was reset to null because there was
-    // recursion.
-    bool Recursive = false;
-
-    // The corresponding allocation or interior call.
-    CallInfo Call;
-
-    // For alloc nodes this is a unique id assigned when constructed, and for
-    // callsite stack nodes it is the original stack id when the node is
-    // constructed from the memprof MIB metadata on the alloc nodes. Note that
-    // this is only used when matching callsite metadata onto the stack nodes
-    // created when processing the allocation memprof MIBs, and for labeling
-    // nodes in the dot graph. Therefore we don't bother to assign a value for
-    // clones.
-    uint64_t OrigStackOrAllocId = 0;
-
-    // This will be formed by ORing together the AllocationType enum values
-    // for contexts including this node.
-    uint8_t AllocTypes = 0;
-
-    // Edges to all callees in the profiled call stacks.
-    // TODO: Should this be a map (from Callee node) for more efficient lookup?
-    std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
-
-    // Edges to all callers in the profiled call stacks.
-    // TODO: Should this be a map (from Caller node) for more efficient lookup?
-    std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
-
-    // The set of IDs for contexts including this node.
-    DenseSet<uint32_t> ContextIds;
-
-    // List of clones of this ContextNode, initially empty.
-    std::vector<ContextNode *> Clones;
-
-    // If a clone, points to the original uncloned node.
-    ContextNode *CloneOf = nullptr;
-
-    ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
-
-    ContextNode(bool IsAllocation, CallInfo C)
-        : IsAllocation(IsAllocation), Call(C) {}
-
-    std::unique_ptr<ContextNode> clone() {
-      auto Clone = std::make_unique<ContextNode>(IsAllocation, Call);
-      if (CloneOf) {
-        CloneOf->Clones.push_back(Clone.get());
-        Clone->CloneOf = CloneOf;
-      } else {
-        Clones.push_back(Clone.get());
-        Clone->CloneOf = this;
-      }
-      return Clone;
-    }
-
-    ContextNode *getOrigNode() {
-      if (!CloneOf)
-        return this;
-      return CloneOf;
-    }
-
-    void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
-                               unsigned int ContextId);
-
-    ContextEdge *findEdgeFromCallee(const ContextNode *Callee);
-    ContextEdge *findEdgeFromCaller(const ContextNode *Caller);
-    void eraseCalleeEdge(const ContextEdge *Edge);
-    void eraseCallerEdge(const ContextEdge *Edge);
-
-    void setCall(CallInfo C) { Call = C; }
-
-    bool hasCall() const { return (bool)Call.call(); }
-
-    void printCall(raw_ostream &OS) const { Call.print(OS); }
-
-    // True if this node was effectively removed from the graph, in which case
-    // its context id set, caller edges, and callee edges should all be empty.
-    bool isRemoved() const {
-      assert(ContextIds.empty() ==
-             (CalleeEdges.empty() && CallerEdges.empty()));
-      return ContextIds.empty();
-    }
-
-    void dump() const;
-    void print(raw_ostream &OS) const;
-
-    friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
-      Node.print(OS);
-      return OS;
-    }
-  };
-
-  /// Edge in the Callsite Context Graph from a ContextNode N to a caller or
-  /// callee.
-  struct ContextEdge {
-    ContextNode *Callee;
-    ContextNode *Caller;
-
-    // This will be formed by ORing together the AllocationType enum values
-    // for contexts including this edge.
-    uint8_t AllocTypes = 0;
-
-    // The set of IDs for contexts including this edge.
-    DenseSet<uint32_t> ContextIds;
-
-    ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,
-                DenseSet<uint32_t> ContextIds)
-        : Callee(Callee), Caller(Caller), AllocTypes(AllocType),
-          ContextIds(ContextIds) {}
-
-    DenseSet<uint32_t> &getContextIds() { return ContextIds; }
-
-    void dump() const;
-    void print(raw_ostream &OS) const;
-
-    friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {
-      Edge.print(OS);
-      return OS;
-    }
-  };
-
-protected:
-  /// Get a list of nodes corresponding to the stack ids in the given callsite
-  /// context.
-  template <class NodeT, class IteratorT>
-  std::vector<uint64_t>
-  getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
-
-  /// Adds nodes for the given allocation and any stack ids on its memprof MIB
-  /// metadata (or summary).
-  ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);
-
-  /// Adds nodes for the given MIB stack ids.
-  template <class NodeT, class IteratorT>
-  void addStackNodesForMIB(ContextNode *AllocNode,
-                           CallStack<NodeT, IteratorT> &StackContext,
-                           CallStack<NodeT, IteratorT> &CallsiteContext,
-                           AllocationType AllocType);
-
-  /// Matches all callsite metadata (or summary) to the nodes created for
-  /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
-  /// inlining performed on those callsite instructions.
-  void updateStackNodes();
-
-  /// Update graph to conservatively handle any callsite stack nodes that target
-  /// multiple 
diff erent callee target functions.
-  void handleCallsitesWithMultipleTargets();
-
-  /// Save lists of calls with MemProf metadata in each function, for faster
-  /// iteration.
-  std::vector<std::pair<FuncTy *, std::vector<CallInfo>>>
-      FuncToCallsWithMetadata;
-
-  /// Map from callsite node to the enclosing caller function.
-  std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
-
-private:
-  using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
-
-  using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
-                                     const FuncTy *, DenseSet<uint32_t>>;
-
-  /// Assigns the given Node to calls at or inlined into the location with
-  /// the Node's stack id, after post order traversing and processing its
-  /// caller nodes. Uses the call information recorded in the given
-  /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences
-  /// as needed. Called by updateStackNodes which sets up the given
-  /// StackIdToMatchingCalls map.
-  void assignStackNodesPostOrder(
-      ContextNode *Node, DenseSet<const ContextNode *> &Visited,
-      DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
-
-  /// Duplicates the given set of context ids, updating the provided
-  /// map from each original id with the newly generated context ids,
-  /// and returning the new duplicated id set.
-  DenseSet<uint32_t> duplicateContextIds(
-      const DenseSet<uint32_t> &StackSequenceContextIds,
-      DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
-
-  /// Propagates all duplicated context ids across the graph.
-  void propagateDuplicateContextIds(
-      const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
-
-  /// Connect the NewNode to OrigNode's callees if TowardsCallee is true,
-  /// else to its callers. Also updates OrigNode's edges to remove any context
-  /// ids moved to the newly created edge.
-  void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
-                      bool TowardsCallee);
-
-  /// Get the stack id corresponding to the given Id or Index (for IR this will
-  /// return itself, for a summary index this will return the id recorded in the
-  /// index for that stack id index value).
-  uint64_t getStackId(uint64_t IdOrIndex) const {
-    return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
-  }
-
-  /// Returns true if the given call targets the given function.
-  bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) {
-    return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func);
-  }
-
-  /// Get a list of nodes corresponding to the stack ids in the given
-  /// callsite's context.
-  std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
-    return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
-        Call);
-  }
-
-  /// Get the last stack id in the context for callsite.
-  uint64_t getLastStackId(CallTy Call) {
-    return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
-  }
-
-  /// Gets a label to use in the dot graph for the given call clone in the given
-  /// function.
-  std::string getLabel(const FuncTy *Func, const CallTy Call,
-                       unsigned CloneNo) const {
-    return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
-  }
-
-  /// Helpers to find the node corresponding to the given call or stackid.
-  ContextNode *getNodeForInst(const CallInfo &C);
-  ContextNode *getNodeForAlloc(const CallInfo &C);
-  ContextNode *getNodeForStackId(uint64_t StackId);
-
-  /// Removes the node information recorded for the given call.
-  void unsetNodeForInst(const CallInfo &C);
-
-  /// Computes the alloc type corresponding to the given context ids, by
-  /// unioning their recorded alloc types.
-  uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
-
-  /// Map from each context ID to the AllocationType assigned to that context.
-  std::map<uint32_t, AllocationType> ContextIdToAllocationType;
-
-  /// Identifies the context node created for a stack id when adding the MIB
-  /// contexts to the graph. This is used to locate the context nodes when
-  /// trying to assign the corresponding callsites with those stack ids to these
-  /// nodes.
-  std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
-
-  /// Maps to track the calls to their corresponding nodes in the graph.
-  std::map<const CallInfo, ContextNode *> AllocationCallToContextNodeMap;
-  std::map<const CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
-
-  /// Owner of all ContextNode unique_ptrs.
-  std::vector<std::unique_ptr<ContextNode>> NodeOwner;
-
-  /// Perform sanity checks on graph when requested.
-  void check() const;
-
-  /// Keeps track of the last unique context id assigned.
-  unsigned int LastContextId = 0;
-};
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-using ContextNode =
-    typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-using ContextEdge =
-    typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-using FuncInfo =
-    typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-using CallInfo =
-    typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
-
-/// CRTP derived class for graphs built from IR (regular LTO).
-class ModuleCallsiteContextGraph
-    : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
-                                  Instruction *> {
-public:
-  ModuleCallsiteContextGraph(Module &M);
-
-private:
-  friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
-                              Instruction *>;
-
-  uint64_t getStackId(uint64_t IdOrIndex) const;
-  bool calleeMatchesFunc(Instruction *Call, const Function *Func);
-  uint64_t getLastStackId(Instruction *Call);
-  std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
-  std::string getLabel(const Function *Func, const Instruction *Call,
-                       unsigned CloneNo) const;
-
-  const Module &Mod;
-};
-
-namespace {
-
-struct FieldSeparator {
-  bool Skip = true;
-  const char *Sep;
-
-  FieldSeparator(const char *Sep = ", ") : Sep(Sep) {}
-};
-
-raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
-  if (FS.Skip) {
-    FS.Skip = false;
-    return OS;
-  }
-  return OS << FS.Sep;
-}
-
-} // end anonymous namespace
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-ContextNode<DerivedCCG, FuncTy, CallTy> *
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
-    const CallInfo &C) {
-  ContextNode *Node = getNodeForAlloc(C);
-  if (Node)
-    return Node;
-
-  auto NonAllocCallNode = NonAllocationCallToContextNodeMap.find(C);
-  if (NonAllocCallNode != NonAllocationCallToContextNodeMap.end()) {
-    return NonAllocCallNode->second;
-  }
-  return nullptr;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-ContextNode<DerivedCCG, FuncTy, CallTy> *
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
-    const CallInfo &C) {
-  auto AllocCallNode = AllocationCallToContextNodeMap.find(C);
-  if (AllocCallNode != AllocationCallToContextNodeMap.end()) {
-    return AllocCallNode->second;
-  }
-  return nullptr;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-ContextNode<DerivedCCG, FuncTy, CallTy> *
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
-    uint64_t StackId) {
-  auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
-  if (StackEntryNode != StackEntryIdToContextNodeMap.end())
-    return StackEntryNode->second;
-  return nullptr;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst(
-    const CallInfo &C) {
-  AllocationCallToContextNodeMap.erase(C) ||
-      NonAllocationCallToContextNodeMap.erase(C);
-  assert(!AllocationCallToContextNodeMap.count(C) &&
-         !NonAllocationCallToContextNodeMap.count(C));
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
-    addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
-                          unsigned int ContextId) {
-  for (auto &Edge : CallerEdges) {
-    if (Edge->Caller == Caller) {
-      Edge->AllocTypes |= (uint8_t)AllocType;
-      Edge->getContextIds().insert(ContextId);
-      return;
-    }
-  }
-  std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
-      this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));
-  CallerEdges.push_back(Edge);
-  Caller->CalleeEdges.push_back(Edge);
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-ContextEdge<DerivedCCG, FuncTy, CallTy> *
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
-    findEdgeFromCallee(const ContextNode *Callee) {
-  for (const auto &Edge : CalleeEdges)
-    if (Edge->Callee == Callee)
-      return Edge.get();
-  return nullptr;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-ContextEdge<DerivedCCG, FuncTy, CallTy> *
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
-    findEdgeFromCaller(const ContextNode *Caller) {
-  for (const auto &Edge : CallerEdges)
-    if (Edge->Caller == Caller)
-      return Edge.get();
-  return nullptr;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
-    eraseCalleeEdge(const ContextEdge *Edge) {
-  auto EI =
-      std::find_if(CalleeEdges.begin(), CalleeEdges.end(),
-                   [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
-                     return CalleeEdge.get() == Edge;
-                   });
-  assert(EI != CalleeEdges.end());
-  CalleeEdges.erase(EI);
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
-    eraseCallerEdge(const ContextEdge *Edge) {
-  auto EI =
-      std::find_if(CallerEdges.begin(), CallerEdges.end(),
-                   [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
-                     return CallerEdge.get() == Edge;
-                   });
-  assert(EI != CallerEdges.end());
-  CallerEdges.erase(EI);
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
-    DenseSet<uint32_t> &ContextIds) {
-  uint8_t BothTypes =
-      (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
-  uint8_t AllocType = (uint8_t)AllocationType::None;
-  for (auto Id : ContextIds) {
-    AllocType |= (uint8_t)ContextIdToAllocationType[Id];
-    // Bail early if alloc type reached both, no further refinement.
-    if (AllocType == BothTypes)
-      return AllocType;
-  }
-  return AllocType;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-ContextNode<DerivedCCG, FuncTy, CallTy> *
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
-    CallInfo Call, const FuncTy *F) {
-  assert(!getNodeForAlloc(Call));
-  NodeOwner.push_back(
-      std::make_unique<ContextNode>(/*IsAllocation=*/true, Call));
-  ContextNode *AllocNode = NodeOwner.back().get();
-  AllocationCallToContextNodeMap[Call] = AllocNode;
-  NodeToCallingFunc[AllocNode] = F;
-  // Use LastContextId as a uniq id for MIB allocation nodes.
-  AllocNode->OrigStackOrAllocId = LastContextId;
-  // Alloc type should be updated as we add in the MIBs. We should assert
-  // afterwards that it is not still None.
-  AllocNode->AllocTypes = (uint8_t)AllocationType::None;
-
-  return AllocNode;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-template <class NodeT, class IteratorT>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
-    ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
-    CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
-  ContextIdToAllocationType[++LastContextId] = AllocType;
-
-  // Update alloc type and context ids for this MIB.
-  AllocNode->AllocTypes |= (uint8_t)AllocType;
-  AllocNode->ContextIds.insert(LastContextId);
-
-  // Now add or update nodes for each stack id in alloc's context.
-  // Later when processing the stack ids on non-alloc callsites we will adjust
-  // for any inlining in the context.
-  ContextNode *PrevNode = AllocNode;
-  // Look for recursion (direct recursion should have been collapsed by
-  // module summary analysis, here we should just be detecting mutual
-  // recursion). Mark these nodes so we don't try to clone.
-  SmallSet<uint64_t, 8> StackIdSet;
-  // Skip any on the allocation call (inlining).
-  for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext);
-       ContextIter != StackContext.end(); ++ContextIter) {
-    auto StackId = getStackId(*ContextIter);
-    ContextNode *StackNode = getNodeForStackId(StackId);
-    if (!StackNode) {
-      NodeOwner.push_back(
-          std::make_unique<ContextNode>(/*IsAllocation=*/false));
-      StackNode = NodeOwner.back().get();
-      StackEntryIdToContextNodeMap[StackId] = StackNode;
-      StackNode->OrigStackOrAllocId = StackId;
-    }
-    auto Ins = StackIdSet.insert(StackId);
-    if (!Ins.second)
-      StackNode->Recursive = true;
-    StackNode->ContextIds.insert(LastContextId);
-    StackNode->AllocTypes |= (uint8_t)AllocType;
-    PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
-    PrevNode = StackNode;
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-DenseSet<uint32_t>
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
-    const DenseSet<uint32_t> &StackSequenceContextIds,
-    DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
-  DenseSet<uint32_t> NewContextIds;
-  for (auto OldId : StackSequenceContextIds) {
-    NewContextIds.insert(++LastContextId);
-    OldToNewContextIds[OldId].insert(LastContextId);
-    assert(ContextIdToAllocationType.count(OldId));
-    // The new context has the same allocation type as original.
-    ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
-  }
-  return NewContextIds;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
-    propagateDuplicateContextIds(
-        const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
-  // Build a set of duplicated context ids corresponding to the input id set.
-  auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {
-    DenseSet<uint32_t> NewIds;
-    for (auto Id : ContextIds)
-      if (auto NewId = OldToNewContextIds.find(Id);
-          NewId != OldToNewContextIds.end())
-        NewIds.insert(NewId->second.begin(), NewId->second.end());
-    return NewIds;
-  };
-
-  // Recursively update context ids sets along caller edges.
-  auto UpdateCallers = [&](ContextNode *Node,
-                           DenseSet<const ContextEdge *> &Visited,
-                           auto &&UpdateCallers) -> void {
-    for (auto Edge : Node->CallerEdges) {
-      auto Inserted = Visited.insert(Edge.get());
-      if (!Inserted.second)
-        continue;
-      ContextNode *NextNode = Edge->Caller;
-      DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
-      // Only need to recursively iterate to NextNode via this caller edge if
-      // it resulted in any added ids to NextNode.
-      if (!NewIdsToAdd.empty()) {
-        Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
-        NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
-        UpdateCallers(NextNode, Visited, UpdateCallers);
-      }
-    }
-  };
-
-  DenseSet<const ContextEdge *> Visited;
-  for (auto &Entry : AllocationCallToContextNodeMap) {
-    auto *Node = Entry.second;
-    // Update ids on the allocation nodes before calling the recursive
-    // update along caller edges, since this simplifies the logic during
-    // that traversal.
-    DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds);
-    Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
-    UpdateCallers(Node, Visited, UpdateCallers);
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
-    ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) {
-  // Make a copy of the context ids, since this will be adjusted below as they
-  // are moved.
-  DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds;
-  auto &OrigEdges =
-      TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
-  // Increment iterator in loop so that we can remove edges as needed.
-  for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
-    auto Edge = *EI;
-    // Remove any matching context ids from Edge, return set that were found and
-    // removed, these are the new edge's context ids. Also update the remaining
-    // (not found ids).
-    DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds;
-    set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
-                 NotFoundContextIds);
-    RemainingContextIds.swap(NotFoundContextIds);
-    // If no matching context ids for this edge, skip it.
-    if (NewEdgeContextIds.empty()) {
-      ++EI;
-      continue;
-    }
-    if (TowardsCallee) {
-      auto NewEdge = std::make_shared<ContextEdge>(
-          Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds),
-          NewEdgeContextIds);
-      NewNode->CalleeEdges.push_back(NewEdge);
-      NewEdge->Callee->CallerEdges.push_back(NewEdge);
-    } else {
-      auto NewEdge = std::make_shared<ContextEdge>(
-          NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds),
-          NewEdgeContextIds);
-      NewNode->CallerEdges.push_back(NewEdge);
-      NewEdge->Caller->CalleeEdges.push_back(NewEdge);
-    }
-    // Remove old edge if context ids empty.
-    if (Edge->getContextIds().empty()) {
-      if (TowardsCallee) {
-        Edge->Callee->eraseCallerEdge(Edge.get());
-        EI = OrigNode->CalleeEdges.erase(EI);
-      } else {
-        Edge->Caller->eraseCalleeEdge(Edge.get());
-        EI = OrigNode->CallerEdges.erase(EI);
-      }
-      continue;
-    }
-    ++EI;
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
-    assignStackNodesPostOrder(ContextNode *Node,
-                              DenseSet<const ContextNode *> &Visited,
-                              DenseMap<uint64_t, std::vector<CallContextInfo>>
-                                  &StackIdToMatchingCalls) {
-  auto Inserted = Visited.insert(Node);
-  if (!Inserted.second)
-    return;
-  // Post order traversal. Iterate over a copy since we may add nodes and
-  // therefore new callers during the recursive call, invalidating any
-  // iterator over the original edge vector. We don't need to process these
-  // new nodes as they were already processed on creation.
-  auto CallerEdges = Node->CallerEdges;
-  for (auto &Edge : CallerEdges) {
-    // Skip any that have been removed during the recursion.
-    if (!Edge)
-      continue;
-    assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
-  }
-
-  // If this node's stack id is in the map, update the graph to contain new
-  // nodes representing any inlining at interior callsites. Note we move the
-  // associated context ids over to the new nodes.
-
-  // Ignore this node if it is for an allocation or we didn't record any
-  // stack id lists ending at it.
-  if (Node->IsAllocation ||
-      !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
-    return;
-
-  auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
-  // Handle the simple case first. A single call with a single stack id.
-  // In this case there is no need to create any new context nodes, simply
-  // assign the context node for stack id to this Call.
-  if (Calls.size() == 1) {
-    auto &[Call, Ids, Func, SavedContextIds] = Calls[0];
-    if (Ids.size() == 1) {
-      assert(SavedContextIds.empty());
-      // It should be this Node
-      assert(Node == getNodeForStackId(Ids[0]));
-      if (Node->Recursive)
-        return;
-      Node->setCall(Call);
-      NonAllocationCallToContextNodeMap[Call] = Node;
-      NodeToCallingFunc[Node] = Func;
-      return;
-    }
-  }
-
-  // Find the node for the last stack id, which should be the same
-  // across all calls recorded for this id, and is this node's id.
-  uint64_t LastId = Node->OrigStackOrAllocId;
-  ContextNode *LastNode = getNodeForStackId(LastId);
-  // We should only have kept stack ids that had nodes.
-  assert(LastNode);
-
-  for (unsigned I = 0; I < Calls.size(); I++) {
-    auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
-    // Skip any for which we didn't assign any ids, these don't get a node in
-    // the graph.
-    if (SavedContextIds.empty())
-      continue;
-
-    assert(LastId == Ids.back());
-
-    ContextNode *FirstNode = getNodeForStackId(Ids[0]);
-    assert(FirstNode);
-
-    // Recompute the context ids for this stack id sequence (the
-    // intersection of the context ids of the corresponding nodes).
-    // Start with the ids we saved in the map for this call, which could be
-    // duplicated context ids. We have to recompute as we might have overlap
-    // overlap between the saved context ids for 
diff erent last nodes, and
-    // removed them already during the post order traversal.
-    set_intersect(SavedContextIds, FirstNode->ContextIds);
-    ContextNode *PrevNode = nullptr;
-    for (auto Id : Ids) {
-      ContextNode *CurNode = getNodeForStackId(Id);
-      // We should only have kept stack ids that had nodes and weren't
-      // recursive.
-      assert(CurNode);
-      assert(!CurNode->Recursive);
-      if (!PrevNode) {
-        PrevNode = CurNode;
-        continue;
-      }
-      auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
-      if (!Edge) {
-        SavedContextIds.clear();
-        break;
-      }
-      PrevNode = CurNode;
-      set_intersect(SavedContextIds, Edge->getContextIds());
-
-      // If we now have no context ids for clone, skip this call.
-      if (SavedContextIds.empty())
-        break;
-    }
-    if (SavedContextIds.empty())
-      continue;
-
-    // Create new context node.
-    NodeOwner.push_back(
-        std::make_unique<ContextNode>(/*IsAllocation=*/false, Call));
-    ContextNode *NewNode = NodeOwner.back().get();
-    NodeToCallingFunc[NewNode] = Func;
-    NonAllocationCallToContextNodeMap[Call] = NewNode;
-    NewNode->ContextIds = SavedContextIds;
-    NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
-
-    // Connect to callees of innermost stack frame in inlined call chain.
-    // This updates context ids for FirstNode's callee's to reflect those
-    // moved to NewNode.
-    connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true);
-
-    // Connect to callers of outermost stack frame in inlined call chain.
-    // This updates context ids for FirstNode's caller's to reflect those
-    // moved to NewNode.
-    connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false);
-
-    // Now we need to remove context ids from edges/nodes between First and
-    // Last Node.
-    PrevNode = nullptr;
-    for (auto Id : Ids) {
-      ContextNode *CurNode = getNodeForStackId(Id);
-      // We should only have kept stack ids that had nodes.
-      assert(CurNode);
-
-      // Remove the context ids moved to NewNode from CurNode, and the
-      // edge from the prior node.
-      set_subtract(CurNode->ContextIds, NewNode->ContextIds);
-      if (PrevNode) {
-        auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
-        assert(PrevEdge);
-        set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
-        if (PrevEdge->getContextIds().empty()) {
-          PrevNode->eraseCallerEdge(PrevEdge);
-          CurNode->eraseCalleeEdge(PrevEdge);
-        }
-      }
-      PrevNode = CurNode;
-    }
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
-  // Map of stack id to all calls with that as the last (outermost caller)
-  // callsite id that has a context node (some might not due to pruning
-  // performed during matching of the allocation profile contexts).
-  // The CallContextInfo contains the Call and a list of its stack ids with
-  // ContextNodes, the function containing Call, and the set of context ids
-  // the analysis will eventually identify for use in any new node created
-  // for that callsite.
-  DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
-  for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
-    for (auto &Call : CallsWithMetadata) {
-      // Ignore allocations, already handled.
-      if (AllocationCallToContextNodeMap.count(Call))
-        continue;
-      auto StackIdsWithContextNodes =
-          getStackIdsWithContextNodesForCall(Call.call());
-      // If there were no nodes created for MIBs on allocs (maybe this was in
-      // the unambiguous part of the MIB stack that was pruned), ignore.
-      if (StackIdsWithContextNodes.empty())
-        continue;
-      // Otherwise, record this Call along with the list of ids for the last
-      // (outermost caller) stack id with a node.
-      StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
-          {Call.call(), StackIdsWithContextNodes, Func, {}});
-    }
-  }
-
-  // First make a pass through all stack ids that correspond to a call,
-  // as identified in the above loop. Compute the context ids corresponding to
-  // each of these calls when they correspond to multiple stack ids due to
-  // due to inlining. Perform any duplication of context ids required when
-  // there is more than one call with the same stack ids. Their (possibly newly
-  // duplicated) context ids are saved in the StackIdToMatchingCalls map.
-  DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
-  for (auto &It : StackIdToMatchingCalls) {
-    auto &Calls = It.getSecond();
-    // Skip single calls with a single stack id. These don't need a new node.
-    if (Calls.size() == 1) {
-      auto &Ids = std::get<1>(Calls[0]);
-      if (Ids.size() == 1)
-        continue;
-    }
-    // In order to do the best and maximal matching of inlined calls to context
-    // node sequences we will sort the vectors of stack ids in descending order
-    // of length, and within each length, lexicographically by stack id. The
-    // latter is so that we can specially handle calls that have identical stack
-    // id sequences (either due to cloning or artificially because of the MIB
-    // context pruning).
-    std::sort(Calls.begin(), Calls.end(),
-              [](const CallContextInfo &A, const CallContextInfo &B) {
-                auto &IdsA = std::get<1>(A);
-                auto &IdsB = std::get<1>(B);
-                return IdsA.size() > IdsB.size() ||
-                       (IdsA.size() == IdsB.size() && IdsA < IdsB);
-              });
-
-    // Find the node for the last stack id, which should be the same
-    // across all calls recorded for this id, and is the id for this
-    // entry in the StackIdToMatchingCalls map.
-    uint64_t LastId = It.getFirst();
-    ContextNode *LastNode = getNodeForStackId(LastId);
-    // We should only have kept stack ids that had nodes.
-    assert(LastNode);
-
-    if (LastNode->Recursive)
-      continue;
-
-    // Initialize the context ids with the last node's. We will subsequently
-    // refine the context ids by computing the intersection along all edges.
-    DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds;
-    assert(!LastNodeContextIds.empty());
-
-    for (unsigned I = 0; I < Calls.size(); I++) {
-      auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
-      assert(SavedContextIds.empty());
-      assert(LastId == Ids.back());
-
-      // First compute the context ids for this stack id sequence (the
-      // intersection of the context ids of the corresponding nodes).
-      // Start with the remaining saved ids for the last node.
-      assert(!LastNodeContextIds.empty());
-      DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
-
-      ContextNode *PrevNode = LastNode;
-      ContextNode *CurNode = LastNode;
-      bool Skip = false;
-
-      // Iterate backwards through the stack Ids, starting after the last Id
-      // in the list, which was handled once outside for all Calls.
-      for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
-        auto Id = *IdIter;
-        CurNode = getNodeForStackId(Id);
-        // We should only have kept stack ids that had nodes.
-        assert(CurNode);
-
-        if (CurNode->Recursive) {
-          Skip = true;
-          break;
-        }
-
-        auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
-        // If there is no edge then the nodes belong to 
diff erent MIB contexts,
-        // and we should skip this inlined context sequence. For example, this
-        // particular inlined context may include stack ids A->B, and we may
-        // indeed have nodes for both A and B, but it is possible that they were
-        // never profiled in sequence in a single MIB for any allocation (i.e.
-        // we might have profiled an allocation that involves the callsite A,
-        // but through a 
diff erent one of its callee callsites, and we might
-        // have profiled an allocation that involves callsite B, but reached
-        // from a 
diff erent caller callsite).
-        if (!Edge) {
-          Skip = true;
-          break;
-        }
-        PrevNode = CurNode;
-
-        // Update the context ids, which is the intersection of the ids along
-        // all edges in the sequence.
-        set_intersect(StackSequenceContextIds, Edge->getContextIds());
-
-        // If we now have no context ids for clone, skip this call.
-        if (StackSequenceContextIds.empty()) {
-          Skip = true;
-          break;
-        }
-      }
-      if (Skip)
-        continue;
-
-      // If some of this call's stack ids did not have corresponding nodes (due
-      // to pruning), don't include any context ids for contexts that extend
-      // beyond these nodes. Otherwise we would be matching part of unrelated /
-      // not fully matching stack contexts. To do this, subtract any context ids
-      // found in caller nodes of the last node found above.
-      if (Ids.back() != getLastStackId(Call)) {
-        for (auto PE : LastNode->CallerEdges) {
-          set_subtract(StackSequenceContextIds, PE->getContextIds());
-          if (StackSequenceContextIds.empty())
-            break;
-        }
-        // If we now have no context ids for clone, skip this call.
-        if (StackSequenceContextIds.empty())
-          continue;
-      }
-
-      // Check if the next set of stack ids is the same (since the Calls vector
-      // of tuples is sorted by the stack ids we can just look at the next one).
-      bool DuplicateContextIds = false;
-      if (I + 1 < Calls.size()) {
-        auto NextIds = std::get<1>(Calls[I + 1]);
-        DuplicateContextIds = Ids == NextIds;
-      }
-
-      // If we don't have duplicate context ids, then we can assign all the
-      // context ids computed for the original node sequence to this call.
-      // If there are duplicate calls with the same stack ids then we synthesize
-      // new context ids that are duplicates of the originals. These are
-      // assigned to SavedContextIds, which is a reference into the map entry
-      // for this call, allowing us to access these ids later on.
-      OldToNewContextIds.reserve(OldToNewContextIds.size() +
-                                 StackSequenceContextIds.size());
-      SavedContextIds =
-          DuplicateContextIds
-              ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
-              : StackSequenceContextIds;
-      assert(!SavedContextIds.empty());
-
-      if (!DuplicateContextIds) {
-        // Update saved last node's context ids to remove those that are
-        // assigned to other calls, so that it is ready for the next call at
-        // this stack id.
-        set_subtract(LastNodeContextIds, StackSequenceContextIds);
-        if (LastNodeContextIds.empty())
-          break;
-      }
-    }
-  }
-
-  // Propagate the duplicate context ids over the graph.
-  propagateDuplicateContextIds(OldToNewContextIds);
-
-  if (VerifyCCG)
-    check();
-
-  // Now perform a post-order traversal over the graph, starting with the
-  // allocation nodes, essentially processing nodes from callers to callees.
-  // For any that contains an id in the map, update the graph to contain new
-  // nodes representing any inlining at interior callsites. Note we move the
-  // associated context ids over to the new nodes.
-  DenseSet<const ContextNode *> Visited;
-  for (auto &Entry : AllocationCallToContextNodeMap)
-    assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
-}
-
-uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
-  CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
-      Call->getMetadata(LLVMContext::MD_callsite));
-  return CallsiteContext.back();
-}
-
-std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
-                                                 const Instruction *Call,
-                                                 unsigned CloneNo) const {
-  return (Twine(Call->getFunction()->getName()) + " -> " +
-          cast<CallBase>(Call)->getCalledFunction()->getName())
-      .str();
-}
-
-std::vector<uint64_t>
-ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
-    Instruction *Call) {
-  CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
-      Call->getMetadata(LLVMContext::MD_callsite));
-  return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
-      CallsiteContext);
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-template <class NodeT, class IteratorT>
-std::vector<uint64_t>
-CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
-    CallStack<NodeT, IteratorT> &CallsiteContext) {
-  std::vector<uint64_t> StackIds;
-  for (auto IdOrIndex : CallsiteContext) {
-    auto StackId = getStackId(IdOrIndex);
-    ContextNode *Node = getNodeForStackId(StackId);
-    if (!Node)
-      break;
-    StackIds.push_back(StackId);
-  }
-  return StackIds;
-}
-
-ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) {
-  for (auto &F : M) {
-    std::vector<CallInfo> CallsWithMetadata;
-    for (auto &BB : F) {
-      for (auto &I : BB) {
-        if (!isa<CallBase>(I))
-          continue;
-        if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
-          CallsWithMetadata.push_back(&I);
-          auto *AllocNode = addAllocNode(&I, &F);
-          auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
-          assert(CallsiteMD);
-          CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD);
-          // Add all of the MIBs and their stack nodes.
-          for (auto &MDOp : MemProfMD->operands()) {
-            auto *MIBMD = cast<const MDNode>(MDOp);
-            MDNode *StackNode = getMIBStackNode(MIBMD);
-            assert(StackNode);
-            CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
-            addStackNodesForMIB<MDNode, MDNode::op_iterator>(
-                AllocNode, StackContext, CallsiteContext,
-                getMIBAllocType(MIBMD));
-          }
-          assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
-          // Memprof and callsite metadata on memory allocations no longer
-          // needed.
-          I.setMetadata(LLVMContext::MD_memprof, nullptr);
-          I.setMetadata(LLVMContext::MD_callsite, nullptr);
-        }
-        // For callsite metadata, add to list for this function for later use.
-        else if (I.getMetadata(LLVMContext::MD_callsite))
-          CallsWithMetadata.push_back(&I);
-      }
-    }
-    if (!CallsWithMetadata.empty())
-      FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata});
-  }
-
-  if (DumpCCG) {
-    dbgs() << "CCG before updating call stack chains:\n";
-    dbgs() << *this;
-  }
-
-  if (ExportToDot)
-    exportToDot("prestackupdate");
-
-  updateStackNodes();
-
-  handleCallsitesWithMultipleTargets();
-
-  // Strip off remaining callsite metadata, no longer needed.
-  for (auto &FuncEntry : FuncToCallsWithMetadata)
-    for (auto &Call : FuncEntry.second)
-      Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy,
-                          CallTy>::handleCallsitesWithMultipleTargets() {
-  // Look for and workaround callsites that call multiple functions.
-  // This can happen for indirect calls, which needs better handling, and in
-  // more rare cases (e.g. macro expansion).
-  // TODO: To fix this for indirect calls we will want to perform speculative
-  // devirtualization using either the normal PGO info with ICP, or using the
-  // information in the profiled MemProf contexts. We can do this prior to
-  // this transformation for regular LTO, and for ThinLTO we can simulate that
-  // effect in the summary and perform the actual speculative devirtualization
-  // while cloning in the ThinLTO backend.
-  for (auto Entry = NonAllocationCallToContextNodeMap.begin();
-       Entry != NonAllocationCallToContextNodeMap.end();) {
-    auto *Node = Entry->second;
-    assert(Node->Clones.empty());
-    // Check all node callees and see if in the same function.
-    bool Removed = false;
-    auto Call = Node->Call.call();
-    for (auto &Edge : Node->CalleeEdges) {
-      if (!Edge->Callee->hasCall())
-        continue;
-      assert(NodeToCallingFunc.count(Edge->Callee));
-      // Check if the called function matches that of the callee node.
-      if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee]))
-        continue;
-      // Work around by setting Node to have a null call, so it gets
-      // skipped during cloning. Otherwise assignFunctions will assert
-      // because its data structures are not designed to handle this case.
-      Entry = NonAllocationCallToContextNodeMap.erase(Entry);
-      Node->setCall(CallInfo());
-      Removed = true;
-      break;
-    }
-    if (!Removed)
-      Entry++;
-  }
-}
-
-uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
-  // In the Module (IR) case this is already the Id.
-  return IdOrIndex;
-}
-
-bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
-                                                   const Function *Func) {
-  auto *CB = dyn_cast<CallBase>(Call);
-  if (!CB->getCalledOperand())
-    return false;
-  auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
-  auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
-  if (CalleeFunc == Func)
-    return true;
-  auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
-  return Alias && Alias->getAliasee() == Func;
-}
-
-static std::string getAllocTypeString(uint8_t AllocTypes) {
-  if (!AllocTypes)
-    return "None";
-  std::string Str;
-  if (AllocTypes & (uint8_t)AllocationType::NotCold)
-    Str += "NotCold";
-  if (AllocTypes & (uint8_t)AllocationType::Cold)
-    Str += "Cold";
-  return Str;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
-    const {
-  print(dbgs());
-  dbgs() << "\n";
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
-    raw_ostream &OS) const {
-  OS << "Node " << this << "\n";
-  OS << "\t";
-  printCall(OS);
-  if (Recursive)
-    OS << " (recursive)";
-  OS << "\n";
-  OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
-  OS << "\tContextIds:";
-  std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
-  std::sort(SortedIds.begin(), SortedIds.end());
-  for (auto Id : SortedIds)
-    OS << " " << Id;
-  OS << "\n";
-  OS << "\tCalleeEdges:\n";
-  for (auto &Edge : CalleeEdges)
-    OS << "\t\t" << *Edge << "\n";
-  OS << "\tCallerEdges:\n";
-  for (auto &Edge : CallerEdges)
-    OS << "\t\t" << *Edge << "\n";
-  if (!Clones.empty()) {
-    OS << "\tClones: ";
-    FieldSeparator FS;
-    for (auto *Clone : Clones)
-      OS << FS << Clone;
-    OS << "\n";
-  } else if (CloneOf) {
-    OS << "\tClone of " << CloneOf << "\n";
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
-    const {
-  print(dbgs());
-  dbgs() << "\n";
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
-    raw_ostream &OS) const {
-  OS << "Edge from Callee " << Callee << " to Caller: " << Caller
-     << " AllocTypes: " << getAllocTypeString(AllocTypes);
-  OS << " ContextIds:";
-  std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
-  std::sort(SortedIds.begin(), SortedIds.end());
-  for (auto Id : SortedIds)
-    OS << " " << Id;
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {
-  print(dbgs());
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
-    raw_ostream &OS) const {
-  OS << "Callsite Context Graph:\n";
-  using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
-  for (const auto Node : nodes<GraphType>(this)) {
-    if (Node->isRemoved())
-      continue;
-    Node->print(OS);
-    OS << "\n";
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkEdge(
-    const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
-  // Confirm that alloc type is not None and that we have at least one context
-  // id.
-  assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
-  assert(!Edge->ContextIds.empty());
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) {
-  if (Node->isRemoved())
-    return;
-  // Node's context ids should be the union of both its callee and caller edge
-  // context ids.
-  if (Node->CallerEdges.size()) {
-    auto EI = Node->CallerEdges.begin();
-    auto &FirstEdge = *EI;
-    EI++;
-    DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
-    for (; EI != Node->CallerEdges.end(); EI++) {
-      const auto &Edge = *EI;
-      set_union(CallerEdgeContextIds, Edge->ContextIds);
-    }
-    // Node can have more context ids than callers if some contexts terminate at
-    // node and some are longer.
-    assert(Node->ContextIds == CallerEdgeContextIds ||
-           set_is_subset(CallerEdgeContextIds, Node->ContextIds));
-  }
-  if (Node->CalleeEdges.size()) {
-    auto EI = Node->CalleeEdges.begin();
-    auto &FirstEdge = *EI;
-    EI++;
-    DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
-    for (; EI != Node->CalleeEdges.end(); EI++) {
-      const auto &Edge = *EI;
-      set_union(CalleeEdgeContextIds, Edge->ContextIds);
-    }
-    assert(Node->ContextIds == CalleeEdgeContextIds);
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
-  using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
-  for (const auto Node : nodes<GraphType>(this)) {
-    checkNode<DerivedCCG, FuncTy, CallTy>(Node);
-    for (auto &Edge : Node->CallerEdges)
-      checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
-  }
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {
-  using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
-  using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;
-
-  using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
-  static NodeRef getNode(const NodePtrTy &P) { return P.get(); }
-
-  using nodes_iterator =
-      mapped_iterator<typename std::vector<NodePtrTy>::const_iterator,
-                      decltype(&getNode)>;
-
-  static nodes_iterator nodes_begin(GraphType G) {
-    return nodes_iterator(G->NodeOwner.begin(), &getNode);
-  }
-
-  static nodes_iterator nodes_end(GraphType G) {
-    return nodes_iterator(G->NodeOwner.end(), &getNode);
-  }
-
-  static NodeRef getEntryNode(GraphType G) {
-    return G->NodeOwner.begin()->get();
-  }
-
-  using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
-  static const ContextNode<DerivedCCG, FuncTy, CallTy> *
-  GetCallee(const EdgePtrTy &P) {
-    return P->Callee;
-  }
-
-  using ChildIteratorType =
-      mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<
-                          DerivedCCG, FuncTy, CallTy>>>::const_iterator,
-                      decltype(&GetCallee)>;
-
-  static ChildIteratorType child_begin(NodeRef N) {
-    return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee);
-  }
-
-  static ChildIteratorType child_end(NodeRef N) {
-    return ChildIteratorType(N->CalleeEdges.end(), &GetCallee);
-  }
-};
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
-    : public DefaultDOTGraphTraits {
-  DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
-
-  using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
-  using GTraits = GraphTraits<GraphType>;
-  using NodeRef = typename GTraits::NodeRef;
-  using ChildIteratorType = typename GTraits::ChildIteratorType;
-
-  static std::string getNodeLabel(NodeRef Node, GraphType G) {
-    std::string LabelString =
-        (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
-         Twine(Node->OrigStackOrAllocId))
-            .str();
-    LabelString += "\n";
-    if (Node->hasCall()) {
-      auto Func = G->NodeToCallingFunc.find(Node);
-      assert(Func != G->NodeToCallingFunc.end());
-      LabelString +=
-          G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
-    } else {
-      LabelString += "null call";
-      if (Node->Recursive)
-        LabelString += " (recursive)";
-      else
-        LabelString += " (external)";
-    }
-    return LabelString;
-  }
-
-  static std::string getNodeAttributes(NodeRef Node, GraphType) {
-    std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
-                                   getContextIds(Node->ContextIds) + "\"")
-                                      .str();
-    AttributeString +=
-        (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
-    AttributeString += ",style=\"filled\"";
-    if (Node->CloneOf) {
-      AttributeString += ",color=\"blue\"";
-      AttributeString += ",style=\"filled,bold,dashed\"";
-    } else
-      AttributeString += ",style=\"filled\"";
-    return AttributeString;
-  }
-
-  static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter,
-                                       GraphType) {
-    auto &Edge = *(ChildIter.getCurrent());
-    return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
-            Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"")
-        .str();
-  }
-
-  // Since the NodeOwners list includes nodes that are no longer connected to
-  // the graph, skip them here.
-  static bool isNodeHidden(NodeRef Node, GraphType) {
-    return Node->isRemoved();
-  }
-
-private:
-  static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {
-    std::string IdString = "ContextIds:";
-    if (ContextIds.size() < 100) {
-      std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
-      std::sort(SortedIds.begin(), SortedIds.end());
-      for (auto Id : SortedIds)
-        IdString += (" " + Twine(Id)).str();
-    } else {
-      IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();
-    }
-    return IdString;
-  }
-
-  static std::string getColor(uint8_t AllocTypes) {
-    if (AllocTypes == (uint8_t)AllocationType::NotCold)
-      // Color "brown1" actually looks like a lighter red.
-      return "brown1";
-    if (AllocTypes == (uint8_t)AllocationType::Cold)
-      return "cyan";
-    if (AllocTypes ==
-        ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
-      // Lighter purple.
-      return "mediumorchid1";
-    return "gray";
-  }
-
-  static std::string getNodeId(NodeRef Node) {
-    std::stringstream SStream;
-    SStream << std::hex << "N0x" << (unsigned long long)Node;
-    std::string Result = SStream.str();
-    return Result;
-  }
-};
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
-    std::string Label) const {
-  WriteGraph(this, "", false, Label,
-             DotFilePathPrefix + "ccg." + Label + ".dot");
-}
-
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
-  if (DumpCCG) {
-    dbgs() << "CCG before cloning:\n";
-    dbgs() << *this;
-  }
-  if (ExportToDot)
-    exportToDot("postbuild");
-
-  if (VerifyCCG) {
-    check();
-  }
-
-  return false;
-}
-
-bool MemProfContextDisambiguation::processModule(Module &M) {
-  bool Changed = false;
-
-  ModuleCallsiteContextGraph CCG(M);
-  Changed = CCG.process();
-
-  return Changed;
-}
-
-PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
-                                                    ModuleAnalysisManager &AM) {
-  if (!processModule(M))
-    return PreservedAnalyses::all();
-  return PreservedAnalyses::none();
-}

diff  --git a/llvm/test/ThinLTO/X86/memprof-summary.ll b/llvm/test/ThinLTO/X86/memprof-summary.ll
new file mode 100644
index 0000000000000..597cd44c030e7
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-summary.ll
@@ -0,0 +1,184 @@
+;; Check memprof summaries (per module, combined index, and distributed indexes)
+
+; RUN: split-file %s %t
+; RUN: opt -module-summary %t/a.ll -o %ta.bc
+; RUN: opt -module-summary %t/b.ll -o %tb.bc
+
+; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA
+; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562
+
+; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB
+; PRELINKDISB: ^[[PLBAR:[0-9]+]] = gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 4555904644815367798
+; PRELINKDISB: ^[[PLFOO:[0-9]+]] = gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^[[PLBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275
+; PRELINKDISB: ^[[PLBAZ]] = gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^[[PLBAR]], clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000
+
+; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA
+; PRELINKBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178/>
+
+; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB
+; PRELINKBCANB: <STACK_IDS abbrevid=4 op0=-5964873800580613432 op1=2732490490862098848 op2=8632435727821051414 op3=-3421689549917153178/>
+
+; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \
+; RUN:     -thinlto-distributed-indexes \
+; RUN:     -r=%ta.bc,main,plx \
+; RUN:     -r=%ta.bc,_Z3foov, \
+; RUN:     -r=%ta.bc,free, \
+; RUN:     -r=%ta.bc,sleep, \
+; RUN:     -r=%tb.bc,_Z3foov,pl \
+; RUN:     -r=%tb.bc,_Znam, \
+; RUN:     -r=%tb.bc,_Z3bazv,pl
+
+; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS
+; COMBINEDDIS: ^[[COMBBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
+; COMBINEDDIS: ^[[COMBFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[COMBBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848))))))
+; COMBINEDDIS: ^[[COMBBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[COMBBAR]], clones: (0), stackIds: (12481870273128938184))))))
+; COMBINEDDIS: ^[[COMBMAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[COMBFOO]], clones: (0), stackIds: (8632435727821051414)), (callee: ^[[COMBFOO]], clones: (0), stackIds: (15025054523792398438))))))
+
+; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN
+; COMBINEDBCAN: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
+
+; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA
+; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848))))))
+; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438))))))
+
+; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB
+; DISTRIBUTEDDISB: ^[[DISTRBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
+; DISTRIBUTEDDISB: ^[[DISTRFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[DISTRBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848))))))
+; DISTRIBUTEDDISB: ^[[DISTRBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[DISTRBAR]], clones: (0), stackIds: (12481870273128938184))))))
+
+; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA
+; DISTRIBUTEDBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=2732490490862098848/>
+
+; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB
+; DISTRIBUTEDBCANB: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
+
+;--- a.ll
+; ModuleID = 'a.cc'
+source_filename = "a.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress norecurse uwtable
+define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 {
+entry:
+  %call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43
+  %call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47
+  call void @free(ptr noundef %call) #4, !dbg !48
+  %call2 = call i32 @sleep(i32 noundef 10), !dbg !49
+  call void @free(ptr noundef %call1) #4, !dbg !50
+  ret i32 0, !dbg !51
+}
+
+declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1
+
+; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
+
+; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free")
+declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3
+
+declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1
+
+attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly }
+attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git at github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"frame-pointer", i32 2}
+!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!40 = !DISubroutineType(types: !41)
+!41 = !{}
+!42 = !DILocation(line: 6, column: 13, scope: !39)
+!43 = !{i64 8632435727821051414}
+!44 = !DILocation(line: 7, column: 13, scope: !39)
+!45 = !{i64 -3421689549917153178}
+!46 = !DILocation(line: 8, column: 3, scope: !39)
+!47 = !DILocation(line: 9, column: 3, scope: !39)
+!48 = !DILocation(line: 10, column: 3, scope: !39)
+!49 = !DILocation(line: 11, column: 3, scope: !39)
+!50 = !DILocation(line: 12, column: 3, scope: !39)
+!51 = !DILocation(line: 13, column: 3, scope: !39)
+!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
+!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
+!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a")
+
+;--- b.ll
+; ModuleID = 'b.cc'
+source_filename = "b.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress noinline uwtable
+define internal noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 {
+entry:
+  %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48
+  ret ptr %call, !dbg !49
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1
+
+; Function Attrs: mustprogress noinline uwtable
+define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 {
+entry:
+  %call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52
+  ret ptr %call, !dbg !53
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 {
+entry:
+  %call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56
+  ret ptr %call, !dbg !57
+}
+
+attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { builtin allocsize(0) }
+attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git at github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"frame-pointer", i32 2}
+!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!40 = !DISubroutineType(types: !41)
+!41 = !{}
+!42 = !DILocation(line: 2, column: 10, scope: !39)
+!43 = !{!44, !46}
+!44 = !{!45, !"notcold"}
+!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!46 = !{!47, !"cold"}
+!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!48 = !{i64 9086428284934609951}
+!49 = !DILocation(line: 2, column: 3, scope: !39)
+!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!51 = !DILocation(line: 6, column: 10, scope: !50)
+!52 = !{i64 -5964873800580613432}
+!53 = !DILocation(line: 6, column: 3, scope: !50)
+!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!55 = !DILocation(line: 10, column: 10, scope: !54)
+!56 = !{i64 2732490490862098848}
+!57 = !DILocation(line: 10, column: 3, scope: !54)

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
deleted file mode 100644
index 539d88a815ed1..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ /dev/null
@@ -1,158 +0,0 @@
-;; Test callsite context graph generation for simple call graph with
-;; two memprof contexts and no inlining.
-;;
-;; Original code looks like:
-;;
-;; char *bar() {
-;;   return new char[10];
-;; }
-;;
-;; char *baz() {
-;;   return bar();
-;; }
-;;
-;; char *foo() {
-;;   return baz();
-;; }
-;;
-;; int main(int argc, char **argv) {
-;;   char *x = foo();
-;;   char *y = foo();
-;;   memset(x, 0, 10);
-;;   memset(y, 0, 10);
-;;   delete[] x;
-;;   sleep(10);
-;;   delete[] y;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-; RUN: opt -passes=memprof-context-disambiguation \
-; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
-
-; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @main() #0 {
-entry:
-  %call = call noundef ptr @_Z3foov(), !callsite !0
-  %call1 = call noundef ptr @_Z3foov(), !callsite !1
-  ret i32 0
-}
-
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
-
-; Function Attrs: nobuiltin
-declare void @_ZdaPv() #2
-
-define internal ptr @_Z3barv() #3 {
-entry:
-  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
-  ret ptr null
-}
-
-declare ptr @_Znam(i64)
-
-define internal ptr @_Z3bazv() #4 {
-entry:
-  %call = call noundef ptr @_Z3barv(), !callsite !8
-  ret ptr null
-}
-
-; Function Attrs: noinline
-define internal ptr @_Z3foov() #5 {
-entry:
-  %call = call noundef ptr @_Z3bazv(), !callsite !9
-  ret ptr null
-}
-
-; uselistorder directives
-uselistorder ptr @_Z3foov, { 1, 0 }
-
-attributes #0 = { "tune-cpu"="generic" }
-attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) }
-attributes #2 = { nobuiltin }
-attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
-attributes #4 = { "stack-protector-buffer-size"="8" }
-attributes #5 = { noinline }
-attributes #6 = { builtin }
-
-!0 = !{i64 8632435727821051414}
-!1 = !{i64 -3421689549917153178}
-!2 = !{!3, !5}
-!3 = !{!4, !"notcold"}
-!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold"}
-!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
-!7 = !{i64 9086428284934609951}
-!8 = !{i64 -5964873800580613432}
-!9 = !{i64 2732490490862098848}
-
-
-; DUMP: CCG before cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[BAR:0x[a-z0-9]+]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
-
-; DUMP: Node [[BAZ]]
-; DUMP: 	  %call = call noundef ptr @_Z3barv()	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
-
-; DUMP: Node [[FOO]]
-; DUMP: 	  %call = call noundef ptr @_Z3bazv()	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
-
-; DUMP: Node [[MAIN1]]
-; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[MAIN2]]
-; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
-; DUMP: 	CallerEdges:
-
-
-; DOT: digraph "postbuild" {
-; DOT: 	label="postbuild";
-; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
-; DOT: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
-; DOT: 	Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
-; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
-; DOT: 	Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
-; DOT: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
-; DOT: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
-; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
-; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
-; DOT: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
deleted file mode 100644
index c5ed97f182a98..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
+++ /dev/null
@@ -1,232 +0,0 @@
-;; Test callsite context graph generation for call graph with with MIBs
-;; that have pruned contexts that partially match multiple inlined
-;; callsite contexts, requiring duplication of context ids and nodes
-;; while matching callsite nodes onto the graph.
-;;
-;; Original code looks like:
-;;
-;; char *D() {
-;;   return new char[10];
-;; }
-;;
-;; char *F() {
-;;   return D();
-;; }
-;;
-;; char *C() {
-;;   return D();
-;; }
-;;
-;; char *B() {
-;;   return C();
-;; }
-;;
-;; char *E() {
-;;   return C();
-;; }
-;; int main(int argc, char **argv) {
-;;   char *x = B(); // cold
-;;   char *y = E(); // cold
-;;   char *z = F(); // default
-;;   memset(x, 0, 10);
-;;   memset(y, 0, 10);
-;;   memset(z, 0, 10);
-;;   delete[] z;
-;;   sleep(10);
-;;   delete[] x;
-;;   delete[] y;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; The code below was created by forcing inlining of C into both B and E.
-;; Since both allocation contexts via C are cold, the matched memprof
-;; metadata has the context pruned above C's callsite. This requires
-;; matching the stack node for C to callsites where it was inlined (i.e.
-;; the callsites in B and E that have callsite metadata that includes C's).
-;; It also requires duplication of that node in the graph as well as the
-;; duplication of the context ids along that path through the graph,
-;; so that we can represent the duplicated (via inlining) C callsite.
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-; RUN: opt -passes=memprof-context-disambiguation \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
-
-; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
-; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define internal ptr @_Z1Dv() {
-entry:
-  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5
-  ret ptr null
-}
-
-declare ptr @_Znam(i64)
-
-define internal ptr @_Z1Fv() #0 {
-entry:
-  %call = call noundef ptr @_Z1Dv(), !callsite !6
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline optnone uwtable
-define internal ptr @_Z1Cv() #1 {
-entry:
-  %call = call noundef ptr @_Z1Dv(), !callsite !7
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline optnone uwtable
-define internal ptr @_Z1Bv() #1 {
-entry:
-  %call.i = call noundef ptr @_Z1Dv(), !callsite !8
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline optnone uwtable
-define internal ptr @_Z1Ev() #1 {
-entry:
-  %call.i = call noundef ptr @_Z1Dv(), !callsite !9
-  ret ptr null
-}
-
-; Function Attrs: noinline
-declare i32 @main() #2
-
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
-
-; Function Attrs: nounwind
-declare void @_ZdaPv() #4
-
-declare i32 @sleep() #5
-
-attributes #0 = { "disable-tail-calls"="true" }
-attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #2 = { noinline }
-attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
-attributes #4 = { nounwind }
-attributes #5 = { "no-trapping-math"="true" }
-attributes #6 = { builtin }
-
-!0 = !{!1, !3}
-!1 = !{!2, !"cold"}
-!2 = !{i64 6541423618768552252, i64 -6270142974039008131}
-!3 = !{!4, !"notcold"}
-!4 = !{i64 6541423618768552252, i64 -4903163940066524832}
-!5 = !{i64 6541423618768552252}
-!6 = !{i64 -4903163940066524832}
-!7 = !{i64 -6270142974039008131}
-!8 = !{i64 -6270142974039008131, i64 -184525619819294889}
-!9 = !{i64 -6270142974039008131, i64 1905834578520680781}
-
-
-;; After adding only the alloc node memprof metadata, we only have 2 contexts.
-
-; DUMP: CCG before updating call stack chains:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[D:0x[a-z0-9]+]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
-; DUMP: 		Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
-
-; DUMP: Node [[C]]
-; DUMP: 	null Call
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[F]]
-; DUMP: 	null Call
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
-; DUMP: 	CallerEdges:
-
-;; After updating for callsite metadata, we should have generated context ids 3 and 4,
-;; along with 2 new nodes for those callsites. All have the same allocation type
-;; behavior as the original C node.
-
-; DUMP: CCG before cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[D]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2 3 4
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
-; DUMP: 		Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
-; DUMP: 		Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
-; DUMP: 		Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
-
-; DUMP: Node [[F]]
-; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[C2]]
-; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 3
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[B]]
-; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 4
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[E]]
-; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
-; DUMP: 	CallerEdges:
-
-
-; DOTPRE: digraph "prestackupdate" {
-; DOTPRE: 	label="prestackupdate";
-; DOTPRE: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
-; DOTPRE: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
-; DOTPRE: 	Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
-; DOTPRE: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
-; DOTPRE: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
-; DOTPRE: }
-
-
-; DOTPOST:digraph "postbuild" {
-; DOTPOST:	label="postbuild";
-; DOTPOST:	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
-; DOTPOST:	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
-; DOTPOST:	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
-; DOTPOST:	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
-; DOTPOST:	Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
-; DOTPOST:	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
-; DOTPOST:	Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
-; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
-; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
-; DOTPOST:}

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
deleted file mode 100644
index da0fd3f44b45e..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
+++ /dev/null
@@ -1,386 +0,0 @@
-;; Test callsite context graph generation for call graph with with MIBs
-;; that have pruned contexts that partially match multiple inlined
-;; callsite contexts, requiring duplication of context ids and nodes
-;; while matching callsite nodes onto the graph. This test requires more
-;; complex duplication due to multiple contexts for 
diff erent allocations
-;; that share some of the same callsite nodes.
-;;
-;; Original code looks like:
-;;
-;; char *D(bool Call1) {
-;;   if (Call1)
-;;     return new char[10];
-;;   else
-;;     return new char[10];
-;; }
-;;
-;; char *C(bool Call1) {
-;;   return D(Call1);
-;; }
-;;
-;; char *B(bool Call1) {
-;;   if (Call1)
-;;     return C(true);
-;;   else
-;;     return C(false);
-;; }
-;;
-;; char *A(bool Call1) {
-;;   return B(Call1);
-;; }
-;;
-;; char *A1() {
-;;   return A(true);
-;; }
-;;
-;; char *A2() {
-;;   return A(true);
-;; }
-;;
-;; char *A3() {
-;;   return A(false);
-;; }
-;;
-;; char *A4() {
-;;   return A(false);
-;; }
-;;
-;; char *E() {
-;;   return B(true);
-;; }
-;;
-;; char *F() {
-;;   return B(false);
-;; }
-;;
-;; int main(int argc, char **argv) {
-;;   char *a1 = A1(); // cold
-;;   char *a2 = A2(); // cold
-;;   char *e = E(); // default
-;;   char *a3 = A3(); // default
-;;   char *a4 = A4(); // default
-;;   char *f = F(); // cold
-;;   memset(a1, 0, 10);
-;;   memset(a2, 0, 10);
-;;   memset(e, 0, 10);
-;;   memset(a3, 0, 10);
-;;   memset(a4, 0, 10);
-;;   memset(f, 0, 10);
-;;   delete[] a3;
-;;   delete[] a4;
-;;   delete[] e;
-;;   sleep(10);
-;;   delete[] a1;
-;;   delete[] a2;
-;;   delete[] f;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; The code below was created by forcing inlining of A into its callers,
-;; without any other inlining or optimizations. Since both allocation contexts
-;; via A for each allocation in D have the same allocation type (cold via
-;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second
-;; new in D, the contexts for those respective allocations are pruned above A.
-;; The allocations via E and F are to ensure we don't prune above B.
-;;
-;; The matching onto the inlined A[1234]->A sequences will require duplication
-;; of the context id assigned to the context from A for each allocation in D.
-;; This test ensures that we do this correctly in the presence of callsites
-;; shared by the 
diff erent duplicated context ids (i.e. callsite in C).
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-; RUN: opt -passes=memprof-context-disambiguation \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
-
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z1Db(i1 %Call1) #0 {
-entry:
-  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
-  br label %return
-
-if.else:                                          ; No predecessors!
-  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11
-  br label %return
-
-return:                                           ; preds = %if.else, %entry
-  ret ptr null
-}
-
-; Function Attrs: nobuiltin
-declare ptr @_Znam(i64) #1
-
-define ptr @_Z1Cb(i1 %Call1) {
-entry:
-  %tobool = trunc i8 0 to i1
-  %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z1Bb(i1 %Call1) #0 {
-entry:
-  %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13
-  br label %return
-
-if.else:                                          ; No predecessors!
-  %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14
-  br label %return
-
-return:                                           ; preds = %if.else, %entry
-  ret ptr null
-}
-
-define ptr @_Z1Ab(i1 %tobool) #2 {
-entry:
-  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z2A1v(i1 %tobool.i) #0 {
-entry:
-  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z2A2v(i1 %tobool.i) #0 {
-entry:
-  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z2A3v(i1 %tobool.i) #0 {
-entry:
-  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z2A4v(i1 %tobool.i) #0 {
-entry:
-  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z1Ev() #0 {
-entry:
-  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20
-  ret ptr null
-}
-
-; Function Attrs: mustprogress noinline uwtable
-define ptr @_Z1Fv() #0 {
-entry:
-  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21
-  ret ptr null
-}
-
-; Function Attrs: noinline
-declare i32 @main() #3
-
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
-
-declare void @_ZdaPv() #5
-
-declare i32 @sleep() #6
-
-; uselistorder directives
-uselistorder ptr @_Znam, { 1, 0 }
-
-attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #1 = { nobuiltin }
-attributes #2 = { "tune-cpu"="generic" }
-attributes #3 = { noinline }
-attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
-attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
-attributes #6 = { "disable-tail-calls"="true" }
-attributes #7 = { builtin allocsize(0) }
-
-!0 = !{!1, !3}
-!1 = !{!2, !"notcold"}
-!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781}
-!3 = !{!4, !"cold"}
-!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978}
-!5 = !{i64 4854880825882961848}
-!6 = !{!7, !9}
-!7 = !{!8, !"notcold"}
-!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978}
-!9 = !{!10, !"cold"}
-!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832}
-!11 = !{i64 -8775068539491628272}
-!12 = !{i64 -904694911315397047}
-!13 = !{i64 6532298921261778285}
-!14 = !{i64 7859682663773658275}
-!15 = !{i64 -6528110295079665978}
-!16 = !{i64 -6528110295079665978, i64 5747919905719679568}
-!17 = !{i64 -6528110295079665978, i64 -5753238080028016843}
-!18 = !{i64 -6528110295079665978, i64 1794685869326395337}
-!19 = !{i64 -6528110295079665978, i64 5462047985461644151}
-!20 = !{i64 1905834578520680781}
-!21 = !{i64 -4903163940066524832}
-
-
-;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only
-;; match the interesting parts of the pre-update graph here).
-
-; DUMP: CCG before updating call stack chains:
-; DUMP: Callsite Context Graph:
-
-; DUMP: Node [[D1:0x[a-z0-9]+]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-
-; DUMP: Node [[C:0x[a-z0-9]+]]
-; DUMP:         null Call
-; DUMP:         AllocTypes: NotColdCold
-; DUMP:         ContextIds: 1 2 3 4
-; DUMP:         CalleeEdges:
-; DUMP:                 Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2
-; DUMP:                 Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4
-
-; DUMP: Node [[D2]]
-; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 3 4
-
-
-;; After updating for callsite metadata, we should have duplicated the context
-;; ids coming from node A (2 and 3) 4 times, for the 4 
diff erent callers of A,
-;; and used those on new nodes for those callers. Note that while in reality
-;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4,
-;; due to the pruning we have lost this information and thus end up duplicating
-;; both of A's contexts to all of the new nodes (which could result in some
-;; unnecessary cloning.
-
-; DUMP: CCG before cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[D1]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2 5 7 9 11
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
-
-; DUMP: Node [[C]]
-; DUMP: 	  %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
-; DUMP: 		Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
-; DUMP: 		Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
-
-; DUMP: Node [[B1]]
-; DUMP: 	  %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2 5 7 9 11
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
-
-; DUMP: Node [[E]]
-; DUMP: 	  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true)	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[D2]]
-; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 3 4 6 8 10 12
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
-
-; DUMP: Node [[B2]]
-; DUMP: 	  %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 3 4 6 8 10 12
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
-
-; DUMP: Node [[F]]
-; DUMP: 	  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false)	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 4
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[A2]]
-; DUMP: 	  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 5 6
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[A3]]
-; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 7 8
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[A1]]
-; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 9 10
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[A4]]
-; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 11 12
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[A]]
-; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 2 3
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2
-; DUMP: 		Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 	CallerEdges:

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
deleted file mode 100644
index 9ebf219dd37a0..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
+++ /dev/null
@@ -1,261 +0,0 @@
-;; Tests callsite context graph generation for call graph containing indirect
-;; calls. Currently this should result in conservative behavior, such that the
-;; indirect call receives a null call in its graph node, to prevent subsequent
-;; cloning.
-;;
-;; Original code looks like:
-;;
-;; char *foo() {
-;;   return new char[10];
-;; }
-;; class A {
-;; public:
-;;     virtual char *x() { return foo(); }
-;; };
-;; class B : public A {
-;; public:
-;;     char *x() final { return foo(); }
-;; };
-;; char *bar(A *a) {
-;;   return a->x();
-;; }
-;; int main(int argc, char **argv) {
-;;   char *x = foo();
-;;   char *y = foo();
-;;   B b;
-;;   char *z = bar(&b);
-;;   char *w = bar(&b);
-;;   A a;
-;;   char *r = bar(&a);
-;;   char *s = bar(&a);
-;;   memset(x, 0, 10);
-;;   memset(y, 0, 10);
-;;   memset(z, 0, 10);
-;;   memset(w, 0, 10);
-;;   memset(r, 0, 10);
-;;   memset(s, 0, 10);
-;;   delete[] x;
-;;   delete[] w;
-;;   delete[] r;
-;;   sleep(10);
-;;   delete[] y;
-;;   delete[] z;
-;;   delete[] s;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; Compiled without optimization to prevent inlining and devirtualization.
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-; RUN: opt -passes=memprof-context-disambiguation \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
-
-; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
-
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare ptr @_Z3barP1A(ptr)
-
-define i32 @main(ptr %b, ptr %a) #0 {
-entry:
-  %call = call noundef ptr @_Z3foov(), !callsite !0
-  %call1 = call noundef ptr @_Z3foov(), !callsite !1
-  %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !2
-  %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !3
-  %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !4
-  %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !5
-  ret i32 0
-}
-
-; Function Attrs: noinline
-declare void @_ZN1BC2Ev() #1
-
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
-
-; Function Attrs: nobuiltin
-declare void @_ZdaPv() #3
-
-define internal ptr @_ZN1A1xEv() #4 {
-entry:
-  %call = call noundef ptr @_Z3foov(), !callsite !6
-  ret ptr null
-}
-
-; Function Attrs: mustprogress uwtable
-define internal ptr @_ZN1B1xEv() #5 {
-entry:
-  %call = call noundef ptr @_Z3foov(), !callsite !7
-  ret ptr null
-}
-
-; Function Attrs: mustprogress uwtable
-define internal ptr @_Z3foov() #5 {
-entry:
-  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !8, !callsite !21
-  ret ptr null
-}
-
-declare ptr @_Znam(i64) #6
-
-; uselistorder directives
-uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
-
-attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
-attributes #1 = { noinline }
-attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
-attributes #3 = { nobuiltin }
-attributes #4 = { "tune-cpu"="generic" }
-attributes #5 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #6 = { "disable-tail-calls"="true" }
-attributes #7 = { builtin }
-
-!0 = !{i64 8632435727821051414}
-!1 = !{i64 -3421689549917153178}
-!2 = !{i64 6792096022461663180}
-!3 = !{i64 -2709642582978494015}
-!4 = !{i64 748269490701775343}
-!5 = !{i64 -5747251260480066785}
-!6 = !{i64 8256774051149711748}
-!7 = !{i64 -4831879094954754638}
-!8 = !{!9, !11, !13, !15, !17, !19}
-!9 = !{!10, !"notcold"}
-!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343}
-!11 = !{!12, !"cold"}
-!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785}
-!13 = !{!14, !"notcold"}
-!14 = !{i64 2732490490862098848, i64 8632435727821051414}
-!15 = !{!16, !"cold"}
-!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180}
-!17 = !{!18, !"notcold"}
-!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015}
-!19 = !{!20, !"cold"}
-!20 = !{i64 2732490490862098848, i64 -3421689549917153178}
-!21 = !{i64 2732490490862098848}
-
-
-; DUMP: CCG before cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[FOO:0x[a-z0-9]+]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2 3 4 5 6
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6
-
-; DUMP: Node [[AX]]
-; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
-
-;; Bar contains an indirect call, with multiple targets. It's call should be null.
-; DUMP: Node [[BAR]]
-; DUMP: 	null Call
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2 4 5
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
-; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5
-
-; DUMP: Node [[MAIN3]]
-; DUMP: 	  %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a)	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[MAIN4]]
-; DUMP: 	  %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a)	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[MAIN1]]
-; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 3
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[BX]]
-; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 4 5
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
-
-; DUMP: Node [[MAIN5]]
-; DUMP: 	  %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b)	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 4
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[MAIN6]]
-; DUMP: 	  %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b)	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 5
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[MAIN2]]
-; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 6
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
-; DUMP: 	CallerEdges:
-
-
-; DOT: digraph "postbuild" {
-; DOT: 	label="postbuild";
-; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];
-; DOT: 	Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"];
-; DOT: 	Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
-; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"];
-; DOT: 	Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
-; DOT: 	Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
-; DOT: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"];
-; DOT: 	Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
-; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"];
-; DOT: 	Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"];
-; DOT: 	Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
-; DOT: 	Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"];
-; DOT: 	Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"];
-; DOT: 	Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
-; DOT: 	Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"];
-; DOT: 	Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"];
-; DOT: 	Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"];
-; DOT: 	Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"];
-; DOT: 	Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
-; DOT: 	Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"];
-; DOT: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
deleted file mode 100644
index 59f135ca06627..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
+++ /dev/null
@@ -1,189 +0,0 @@
-;; Test callsite context graph generation for call graph with two memprof
-;; contexts and partial inlining, requiring generation of a new fused node to
-;; represent the inlined sequence while matching callsite nodes onto the graph.
-;;
-;; Original code looks like:
-;;
-;; char *bar() {
-;;   return new char[10];
-;; }
-;;
-;; char *baz() {
-;;   return bar();
-;; }
-;;
-;; char *foo() {
-;;   return baz();
-;; }
-;;
-;; int main(int argc, char **argv) {
-;;   char *x = foo();
-;;   char *y = foo();
-;;   memset(x, 0, 10);
-;;   memset(y, 0, 10);
-;;   delete[] x;
-;;   sleep(10);
-;;   delete[] y;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; The code below was created by forcing inlining of baz into foo, and
-;; bar into baz. Due to the inlining of bar we will initially have two
-;; allocation nodes in the graph. This tests that we correctly match
-;; foo (with baz inlined) onto the graph nodes first, and generate a new
-;; fused node for it. We should then not match baz (with bar inlined) as that
-;; is not reached by the MIB contexts (since all calls from main will look
-;; like main -> foo(+baz) -> bar after the inlining reflected in this IR).
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-; RUN: opt -passes=memprof-context-disambiguation \
-; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
-
-; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
-
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define internal ptr @_Z3barv() {
-entry:
-  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
-  ret ptr null
-}
-
-; Function Attrs: nobuiltin
-declare ptr @_Znam(i64) #0
-
-; Function Attrs: mustprogress
-define internal ptr @_Z3bazv() #1 {
-entry:
-  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6
-  ret ptr null
-}
-
-; Function Attrs: noinline
-define internal ptr @_Z3foov() #2 {
-entry:
-  %call.i = call noundef ptr @_Z3barv(), !callsite !7
-  ret ptr null
-}
-
-define i32 @main() #3 {
-entry:
-  %call = call noundef ptr @_Z3foov(), !callsite !8
-  %call1 = call noundef ptr @_Z3foov(), !callsite !9
-  ret i32 0
-}
-
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
-
-; Function Attrs: nounwind
-declare void @_ZdaPv() #5
-
-declare i32 @sleep() #6
-
-attributes #0 = { nobuiltin }
-attributes #1 = { mustprogress }
-attributes #2 = { noinline }
-attributes #3 = { "tune-cpu"="generic" }
-attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
-attributes #5 = { nounwind }
-attributes #6 = { "disable-tail-calls"="true" }
-attributes #7 = { builtin }
-
-!0 = !{!1, !3}
-!1 = !{!2, !"notcold"}
-!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!3 = !{!4, !"cold"}
-!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
-!5 = !{i64 9086428284934609951}
-!6 = !{i64 9086428284934609951, i64 -5964873800580613432}
-!7 = !{i64 -5964873800580613432, i64 2732490490862098848}
-!8 = !{i64 8632435727821051414}
-!9 = !{i64 -3421689549917153178}
-
-
-; DUMP: CCG before cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[BAR:0x[a-z0-9]+]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
-
-;; This is leftover from the MIB on the alloc inlined into baz. It is not
-;; matched with any call, since there is no such node in the IR. Due to the
-;; null call it will not participate in any context transformations.
-; DUMP: Node [[FOO2:0x[a-z0-9]+]]
-; DUMP: 	null Call
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 3 4
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
-
-; DUMP: Node [[MAIN1]]
-; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 1 3
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[MAIN2]]
-; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 2 4
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[BAZ]]
-; DUMP: 	  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 3 4
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
-
-;; This is the node synthesized for the call to bar in foo that was created
-;; by inlining baz into foo.
-; DUMP: Node [[FOO]]
-; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
-
-
-; DOT: digraph "postbuild" {
-; DOT: 	label="postbuild";
-; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
-; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
-; DOT: 	Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
-; DOT: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
-; DOT: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"];
-; DOT: 	Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"];
-; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
-; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"];
-; DOT: 	Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"];
-; DOT: 	Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"];
-; DOT: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
-; DOT: 	Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
-; DOT: }

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
deleted file mode 100644
index a3a056ade8c49..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
+++ /dev/null
@@ -1,135 +0,0 @@
-;; Test callsite context graph generation for call graph with two memprof
-;; contexts and multiple levels of inlining, requiring generation of new
-;; fused nodes to represent the inlined sequence while matching callsite
-;; nodes onto the graph. In particular this tests the case where a function
-;; has inlined a callee containing an inlined callee.
-;;
-;; Original code looks like:
-;;
-;; char *bar() __attribute__((noinline)) {
-;;   return new char[10];
-;; }
-;;
-;; char *baz() {
-;;   return bar();
-;; }
-;;
-;; char *foo() {
-;;   return baz();
-;; }
-;;
-;; int main(int argc, char **argv) {
-;;   char *x = foo();
-;;   char *y = foo();
-;;   memset(x, 0, 10);
-;;   memset(y, 0, 10);
-;;   delete[] x;
-;;   sleep(10);
-;;   delete[] y;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; Both foo and baz are inlined into main, at both foo callsites.
-;; We should update the graph for new fused nodes for both of those inlined
-;; callsites to bar.
-;;
-;; Note that baz and bar are both dead due to the inlining, but have been left
-;; in the input IR to ensure that the MIB call chain is matched to the longer
-;; inline sequences from main.
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-; RUN: opt -passes=memprof-context-disambiguation \
-; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
-
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define ptr @_Z3barv() #0 {
-entry:
-  %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !memprof !7, !callsite !12, !heapallocsite !13
-  ret ptr null
-}
-
-; Function Attrs: nobuiltin
-declare ptr @_Znam(i64) #1
-
-; Function Attrs: mustprogress
-declare ptr @_Z3bazv() #2
-
-define i32 @main() #3 {
-delete.end5:
-  %call.i.i = call noundef ptr @_Z3barv(), !callsite !14
-  %call.i.i8 = call noundef ptr @_Z3barv(), !callsite !15
-  ret i32 0
-}
-
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
-
-declare void @_ZdaPv() #5
-
-declare i32 @sleep() #6
-
-attributes #0 = { "stack-protector-buffer-size"="8" }
-attributes #1 = { nobuiltin }
-attributes #2 = { mustprogress }
-attributes #3 = { "tune-cpu"="generic" }
-attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
-attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
-attributes #6 = { "disable-tail-calls"="true" }
-attributes #7 = { builtin }
-
-!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6}
-
-!0 = !{i32 7, !"Dwarf Version", i32 5}
-!1 = !{i32 2, !"Debug Info Version", i32 3}
-!2 = !{i32 1, !"wchar_size", i32 4}
-!3 = !{i32 8, !"PIC Level", i32 2}
-!4 = !{i32 7, !"PIE Level", i32 2}
-!5 = !{i32 7, !"uwtable", i32 2}
-!6 = !{i32 7, !"frame-pointer", i32 2}
-!7 = !{!8, !10}
-!8 = !{!9, !"notcold"}
-!9 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!10 = !{!11, !"cold"}
-!11 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
-!12 = !{i64 9086428284934609951}
-!13 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
-!14 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!15 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
-
-
-; DUMP: CCG before cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[BAR:0x[a-z0-9]+]]
-; DUMP: 	  %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !heapallocsite !7	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 2
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
-
-;; This is the node synthesized for the first inlined call chain of main->foo->baz
-; DUMP: Node [[MAIN1]]
-; DUMP: 	  %call.i.i = call noundef ptr @_Z3barv()	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
-; DUMP: 	CallerEdges:
-
-;; This is the node synthesized for the second inlined call chain of main->foo->baz
-; DUMP: Node [[MAIN2]]
-; DUMP: 	  %call.i.i8 = call noundef ptr @_Z3barv()	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 2
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
-; DUMP: 	CallerEdges:

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll b/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll
deleted file mode 100644
index fede5fe96eccd..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-;; Test that MemProfContextDisambiguation is enabled under the expected conditions
-;; and in the expected position.
-
-;; Pass is not currently enabled by default at any opt level.
-; RUN: opt -debug-pass-manager -passes='lto<O0>' -S %s \
-; RUN:     2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
-; RUN: opt -debug-pass-manager -passes='lto<O1>' -S %s \
-; RUN:     2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
-; RUN: opt -debug-pass-manager -passes='lto<O2>' -S %s \
-; RUN:     2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
-; RUN: opt -debug-pass-manager -passes='lto<O3>' -S %s \
-; RUN:     2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
-
-;; Pass should not run even under option at O0/O1.
-; RUN: opt -debug-pass-manager -passes='lto<O0>' -S %s \
-; RUN:     -enable-memprof-context-disambiguation \
-; RUN:     2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
-; RUN: opt -debug-pass-manager -passes='lto<O1>' -S %s \
-; RUN:     -enable-memprof-context-disambiguation \
-; RUN:     2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
-
-;; Pass should be enabled under option at O2/O3.
-; RUN: opt -debug-pass-manager -passes='lto<O2>' -S %s \
-; RUN:     -enable-memprof-context-disambiguation \
-; RUN:     2>&1 | FileCheck %s --check-prefix=ENABLED
-; RUN: opt -debug-pass-manager -passes='lto<O3>' -S %s \
-; RUN:     -enable-memprof-context-disambiguation \
-; RUN:     2>&1 | FileCheck %s --check-prefix=ENABLED
-
-;; When enabled, MemProfContextDisambiguation runs just after inlining.
-; ENABLED: Running pass: InlinerPass
-; ENABLED: Invalidating analysis: InlineAdvisorAnalysis
-; ENABLED: Running pass: MemProfContextDisambiguation
-
-define noundef ptr @_Z3barv() {
-entry:
-  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10)
-  ret ptr %call
-}
-
-declare noundef nonnull ptr @_Znam(i64 noundef)


        


More information about the llvm-commits mailing list