[llvm] f09807c - Revert "Restore "[MemProf] Context disambiguation cloning pass [patch 3/4]""

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Thu May 4 09:41:59 PDT 2023


Author: Teresa Johnson
Date: 2023-05-04T09:41:48-07:00
New Revision: f09807ca9dda2f588298d8733e89a81105c88120

URL: https://github.com/llvm/llvm-project/commit/f09807ca9dda2f588298d8733e89a81105c88120
DIFF: https://github.com/llvm/llvm-project/commit/f09807ca9dda2f588298d8733e89a81105c88120.diff

LOG: Revert "Restore "[MemProf] Context disambiguation cloning pass [patch 3/4]""

This reverts commit bfe7205975a63a605ff3faacd97fe4c1bf4c19b3, and follow
on fix e3e6bc699574550f2ed1de07f4e5bcdddaa65557, due to some remaining
instability exposed by the bot enabling expensive checks:
https://lab.llvm.org/buildbot/#/builders/42/builds/9842

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
    llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
    llvm/test/ThinLTO/X86/memprof-basic.ll
    llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
    llvm/test/ThinLTO/X86/memprof-indirectcall.ll
    llvm/test/ThinLTO/X86/memprof-inlined.ll
    llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
    llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
    llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll

Removed: 
    llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
    llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
index 13f3a7eb7ce3f..475ea48cca932 100644
--- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
+++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
@@ -25,14 +25,11 @@ namespace llvm {
 class GlobalValueSummary;
 class Module;
 class ModuleSummaryIndex;
-class OptimizationRemarkEmitter;
 
 class MemProfContextDisambiguation
     : public PassInfoMixin<MemProfContextDisambiguation> {
   /// Run the context disambiguator on \p M, returns true if any changes made.
-  bool processModule(
-      Module &M,
-      function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
+  bool processModule(Module &M);
 
 public:
   MemProfContextDisambiguation() {}

diff  --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 3fff7e55cfe3f..5c8aaddfe3bb3 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -27,10 +27,8 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -41,7 +39,6 @@
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Utils/Cloning.h"
 #include <sstream>
 #include <vector>
 using namespace llvm;
@@ -49,13 +46,6 @@ using namespace llvm::memprof;
 
 #define DEBUG_TYPE "memprof-context-disambiguation"
 
-STATISTIC(FunctionClonesAnalysis,
-          "Number of function clones created during whole program analysis");
-STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
-                            "cloned) during whole program analysis");
-STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
-                         "during whole program analysis");
-
 static cl::opt<std::string> DotFilePathPrefix(
     "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
     cl::value_desc("filename"),
@@ -105,13 +95,6 @@ class CallsiteContextGraph {
   /// behavior of an allocation based on its context.
   void identifyClones();
 
-  /// Assign callsite clones to functions, cloning functions as needed to
-  /// accommodate the combinations of their callsite clones reached by callers.
-  /// For regular LTO this clones functions and callsites in the IR, but for
-  /// ThinLTO the cloning decisions are noted in the summaries and applied
-  /// later.
-  bool assignFunctions();
-
   void dump() const;
   void print(raw_ostream &OS) const;
 
@@ -392,28 +375,6 @@ class CallsiteContextGraph {
     return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
   }
 
-  /// Update the allocation call to record type of allocated memory.
-  void updateAllocationCall(CallInfo &Call, AllocationType AllocType) {
-    AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
-    static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
-  }
-
-  /// Update non-allocation call to invoke (possibly cloned) function
-  /// CalleeFunc.
-  void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
-    static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
-  }
-
-  /// Clone the given function for the given callsite, recording mapping of all
-  /// of the functions tracked calls to their new versions in the CallMap.
-  /// Assigns new clones to clone number CloneNo.
-  FuncInfo cloneFunctionForCallsite(
-      FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
-      std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
-    return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
-        Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
-  }
-
   /// Gets a label to use in the dot graph for the given call clone in the given
   /// function.
   std::string getLabel(const FuncTy *Func, const CallTy Call,
@@ -508,9 +469,7 @@ class ModuleCallsiteContextGraph
     : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
                                   Instruction *> {
 public:
-  ModuleCallsiteContextGraph(
-      Module &M,
-      function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
+  ModuleCallsiteContextGraph(Module &M);
 
 private:
   friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
@@ -520,19 +479,10 @@ class ModuleCallsiteContextGraph
   bool calleeMatchesFunc(Instruction *Call, const Function *Func);
   uint64_t getLastStackId(Instruction *Call);
   std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
-  void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
-  void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
-  CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
-                       Instruction *>::FuncInfo
-  cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
-                           std::map<CallInfo, CallInfo> &CallMap,
-                           std::vector<CallInfo> &CallsWithMetadataInFunc,
-                           unsigned CloneNo);
   std::string getLabel(const Function *Func, const Instruction *Call,
                        unsigned CloneNo) const;
 
   const Module &Mod;
-  function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
 };
 
 /// Represents a call in the summary index graph, which can either be an
@@ -577,14 +527,6 @@ class IndexCallsiteContextGraph
   bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func);
   uint64_t getLastStackId(IndexCall &Call);
   std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
-  void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
-  void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
-  CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
-                       IndexCall>::FuncInfo
-  cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
-                           std::map<CallInfo, CallInfo> &CallMap,
-                           std::vector<CallInfo> &CallsWithMetadataInFunc,
-                           unsigned CloneNo);
   std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
                        unsigned CloneNo) const;
 
@@ -1340,14 +1282,10 @@ uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
   return Index.getStackIdAtIndex(CallsiteContext.back());
 }
 
-static const std::string MemProfCloneSuffix = ".memprof.";
-
 static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) {
-  // We use CloneNo == 0 to refer to the original version, which doesn't get
-  // renamed with a suffix.
   if (!CloneNo)
     return Base.str();
-  return (Base + MemProfCloneSuffix + Twine(CloneNo)).str();
+  return (Base + ".memprof." + Twine(CloneNo)).str();
 }
 
 std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
@@ -1409,9 +1347,7 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
   return StackIds;
 }
 
-ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
-    Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
-    : Mod(M), OREGetter(OREGetter) {
+ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) {
   for (auto &F : M) {
     std::vector<CallInfo> CallsWithMetadata;
     for (auto &BB : F) {
@@ -1725,7 +1661,7 @@ static void checkEdge(
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
-                      bool CheckEdges = true) {
+                      bool CheckEdges = false) {
   if (Node->isRemoved())
     return;
   // Node's context ids should be the union of both its callee and caller edge
@@ -1765,7 +1701,7 @@ template <typename DerivedCCG, typename FuncTy, typename CallTy>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
   for (const auto Node : nodes<GraphType>(this)) {
-    checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
+    checkNode<DerivedCCG, FuncTy, CallTy>(Node);
     for (auto &Edge : Node->CallerEdges)
       checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
   }
@@ -1989,14 +1925,12 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
     NewEdge->Callee->CallerEdges.push_back(NewEdge);
   }
   if (VerifyCCG) {
-    checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
-    checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
+    checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee);
+    checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee);
     for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
-      checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
-                                            /*CheckEdges=*/false);
+      checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee);
     for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
-      checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
-                                            /*CheckEdges=*/false);
+      checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee);
   }
 }
 
@@ -2011,7 +1945,7 @@ template <typename DerivedCCG, typename FuncTy, typename CallTy>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
     ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
   if (VerifyNodes)
-    checkNode<DerivedCCG, FuncTy, CallTy>(Node);
+    checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/true);
   assert(!Node->CloneOf);
 
   // If Node as a null call, then either it wasn't found in the module (regular
@@ -2165,7 +2099,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
   for (auto *Clone : Node->Clones) {
     removeNoneTypeCalleeEdges(Clone);
     if (VerifyNodes)
-      checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
+      checkNode<DerivedCCG, FuncTy, CallTy>(Clone, /*CheckEdges=*/true);
   }
   // We should still have some context ids on the original Node.
   assert(!Node->ContextIds.empty());
@@ -2186,595 +2120,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
                        }));
 
   if (VerifyNodes)
-    checkNode<DerivedCCG, FuncTy, CallTy>(Node);
-}
-
-static std::string getAllocTypeAttributeString(AllocationType Type) {
-  switch (Type) {
-  case AllocationType::NotCold:
-    return "notcold";
-    break;
-  case AllocationType::Cold:
-    return "cold";
-    break;
-  default:
-    dbgs() << "Unexpected alloc type " << (uint8_t)Type;
-    assert(false);
-  }
-  llvm_unreachable("invalid alloc type");
-}
-
-void ModuleCallsiteContextGraph::updateAllocationCall(
-    CallInfo &Call, AllocationType AllocType) {
-  std::string AllocTypeString = getAllocTypeAttributeString(AllocType);
-  auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
-                                "memprof", AllocTypeString);
-  cast<CallBase>(Call.call())->addFnAttr(A);
-  OREGetter(Call.call()->getFunction())
-      .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())
-            << ore::NV("AllocationCall", Call.call()) << " in clone "
-            << ore::NV("Caller", Call.call()->getFunction())
-            << " marked with memprof allocation attribute "
-            << ore::NV("Attribute", AllocTypeString));
-}
-
-void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
-                                                     AllocationType AllocType) {
-  auto *AI = Call.call().dyn_cast<AllocInfo *>();
-  assert(AI);
-  assert(AI->Versions.size() > Call.cloneNo());
-  AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
-}
-
-void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
-                                            FuncInfo CalleeFunc) {
-  if (CalleeFunc.cloneNo() > 0)
-    cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
-  OREGetter(CallerCall.call()->getFunction())
-      .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
-            << ore::NV("Call", CallerCall.call()) << " in clone "
-            << ore::NV("Caller", CallerCall.call()->getFunction())
-            << " assigned to call function clone "
-            << ore::NV("Callee", CalleeFunc.func()));
-}
-
-void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
-                                           FuncInfo CalleeFunc) {
-  auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>();
-  assert(CI &&
-         "Caller cannot be an allocation which should not have profiled calls");
-  assert(CI->Clones.size() > CallerCall.cloneNo());
-  CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
-}
-
-CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
-                     Instruction *>::FuncInfo
-ModuleCallsiteContextGraph::cloneFunctionForCallsite(
-    FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
-    std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
-  // Use existing LLVM facilities for cloning and obtaining Call in clone
-  ValueToValueMapTy VMap;
-  auto *NewFunc = CloneFunction(Func.func(), VMap);
-  std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo);
-  assert(!Func.func()->getParent()->getFunction(Name));
-  NewFunc->setName(Name);
-  for (auto &Inst : CallsWithMetadataInFunc) {
-    // This map always has the initial version in it.
-    assert(Inst.cloneNo() == 0);
-    CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
-  }
-  OREGetter(Func.func())
-      .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())
-            << "created clone " << ore::NV("NewFunction", NewFunc));
-  return {NewFunc, CloneNo};
-}
-
-CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
-                     IndexCall>::FuncInfo
-IndexCallsiteContextGraph::cloneFunctionForCallsite(
-    FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
-    std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
-  // Check how many clones we have of Call (and therefore function).
-  // The next clone number is the current size of versions array.
-  // Confirm this matches the CloneNo provided by the caller, which is based on
-  // the number of function clones we have.
-  assert(CloneNo ==
-         (Call.call().is<AllocInfo *>()
-              ? Call.call().dyn_cast<AllocInfo *>()->Versions.size()
-              : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size()));
-  // Walk all the instructions in this function. Create a new version for
-  // each (by adding an entry to the Versions/Clones summary array), and copy
-  // over the version being called for the function clone being cloned here.
-  // Additionally, add an entry to the CallMap for the new function clone,
-  // mapping the original call (clone 0, what is in CallsWithMetadataInFunc)
-  // to the new call clone.
-  for (auto &Inst : CallsWithMetadataInFunc) {
-    // This map always has the initial version in it.
-    assert(Inst.cloneNo() == 0);
-    if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) {
-      assert(AI->Versions.size() == CloneNo);
-      // We assign the allocation type later (in updateAllocationCall), just add
-      // an entry for it here.
-      AI->Versions.push_back(0);
-    } else {
-      auto *CI = Inst.call().dyn_cast<CallsiteInfo *>();
-      assert(CI && CI->Clones.size() == CloneNo);
-      // We assign the clone number later (in updateCall), just add an entry for
-      // it here.
-      CI->Clones.push_back(0);
-    }
-    CallMap[Inst] = {Inst.call(), CloneNo};
-  }
-  return {Func.func(), CloneNo};
-}
-
-// This method assigns cloned callsites to functions, cloning the functions as
-// needed. The assignment is greedy and proceeds roughly as follows:
-//
-// For each function Func:
-//   For each call with graph Node having clones:
-//     Initialize ClonesWorklist to Node and its clones
-//     Initialize NodeCloneCount to 0
-//     While ClonesWorklist is not empty:
-//        Clone = pop front ClonesWorklist
-//        NodeCloneCount++
-//        If Func has been cloned less than NodeCloneCount times:
-//           If NodeCloneCount is 1:
-//             Assign Clone to original Func
-//             Continue
-//           Create a new function clone
-//           If other callers not assigned to call a function clone yet:
-//              Assign them to call new function clone
-//              Continue
-//           Assign any other caller calling the cloned version to new clone
-//
-//        For each caller of Clone:
-//           If caller is assigned to call a specific function clone:
-//             If we cannot assign Clone to that function clone:
-//               Create new callsite Clone NewClone
-//               Add NewClone to ClonesWorklist
-//               Continue
-//             Assign Clone to existing caller's called function clone
-//           Else:
-//             If Clone not already assigned to a function clone:
-//                Assign to first function clone without assignment
-//             Assign caller to selected function clone
-template <typename DerivedCCG, typename FuncTy, typename CallTy>
-bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
-  bool Changed = false;
-
-  // Keep track of the assignment of nodes (callsites) to function clones they
-  // call.
-  DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
-
-  // Update caller node to call function version CalleeFunc, by recording the
-  // assignment in CallsiteToCalleeFuncCloneMap.
-  auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,
-                                        const FuncInfo &CalleeFunc) {
-    assert(Caller->hasCall());
-    CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
-  };
-
-  // Walk all functions for which we saw calls with memprof metadata, and handle
-  // cloning for each of its calls.
-  for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
-    FuncInfo OrigFunc(Func);
-    // Map from each clone of OrigFunc to a map of remappings of each call of
-    // interest (from original uncloned call to the corresponding cloned call in
-    // that function clone).
-    std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
-    for (auto &Call : CallsWithMetadata) {
-      ContextNode *Node = getNodeForInst(Call);
-      // Skip call if we do not have a node for it (all uses of its stack ids
-      // were either on inlined chains or pruned from the MIBs), or if we did
-      // not create any clones for it.
-      if (!Node || Node->Clones.empty())
-        continue;
-      assert(Node->hasCall() &&
-             "Not having a call should have prevented cloning");
-
-      // Track the assignment of function clones to clones of the current
-      // callsite Node being handled.
-      std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
-
-      // Assign callsite version CallsiteClone to function version FuncClone,
-      // and also assign (possibly cloned) Call to CallsiteClone.
-      auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,
-                                                CallInfo &Call,
-                                                ContextNode *CallsiteClone,
-                                                bool IsAlloc) {
-        // Record the clone of callsite node assigned to this function clone.
-        FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
-
-        assert(FuncClonesToCallMap.count(FuncClone));
-        std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
-        CallInfo CallClone(Call);
-        if (CallMap.count(Call))
-          CallClone = CallMap[Call];
-        CallsiteClone->setCall(CallClone);
-      };
-
-      // Keep track of the clones of callsite Node that need to be assigned to
-      // function clones. This list may be expanded in the loop body below if we
-      // find additional cloning is required.
-      std::deque<ContextNode *> ClonesWorklist;
-      // Ignore original Node if we moved all of its contexts to clones.
-      if (!Node->ContextIds.empty())
-        ClonesWorklist.push_back(Node);
-      ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
-                            Node->Clones.end());
-
-      // Now walk through all of the clones of this callsite Node that we need,
-      // and determine the assignment to a corresponding clone of the current
-      // function (creating new function clones as needed).
-      unsigned NodeCloneCount = 0;
-      while (!ClonesWorklist.empty()) {
-        ContextNode *Clone = ClonesWorklist.front();
-        ClonesWorklist.pop_front();
-        NodeCloneCount++;
-        if (VerifyNodes)
-          checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
-
-        // Need to create a new function clone if we have more callsite clones
-        // than existing function clones, which would have been assigned to an
-        // earlier clone in the list (we assign callsite clones to function
-        // clones greedily).
-        if (FuncClonesToCallMap.size() < NodeCloneCount) {
-          // If this is the first callsite copy, assign to original function.
-          if (NodeCloneCount == 1) {
-            // Since FuncClonesToCallMap is empty in this case, no clones have
-            // been created for this function yet, and no callers should have
-            // been assigned a function clone for this callee node yet.
-            assert(llvm::none_of(
-                Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
-                  return CallsiteToCalleeFuncCloneMap.count(E->Caller);
-                }));
-            // Initialize with empty call map, assign Clone to original function
-            // and its callers, and skip to the next clone.
-            FuncClonesToCallMap[OrigFunc] = {};
-            AssignCallsiteCloneToFuncClone(
-                OrigFunc, Call, Clone,
-                AllocationCallToContextNodeMap.count(Call));
-            for (auto &CE : Clone->CallerEdges) {
-              // Ignore any caller that does not have a recorded callsite Call.
-              if (!CE->Caller->hasCall())
-                continue;
-              RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
-            }
-            continue;
-          }
-
-          // First locate which copy of OrigFunc to clone again. If a caller
-          // of this callsite clone was already assigned to call a particular
-          // function clone, we need to redirect all of those callers to the
-          // new function clone, and update their other callees within this
-          // function.
-          FuncInfo PreviousAssignedFuncClone;
-          auto EI = llvm::find_if(
-              Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
-                return CallsiteToCalleeFuncCloneMap.count(E->Caller);
-              });
-          bool CallerAssignedToCloneOfFunc = false;
-          if (EI != Clone->CallerEdges.end()) {
-            const std::shared_ptr<ContextEdge> &Edge = *EI;
-            PreviousAssignedFuncClone =
-                CallsiteToCalleeFuncCloneMap[Edge->Caller];
-            CallerAssignedToCloneOfFunc = true;
-          }
-
-          // Clone function and save it along with the CallInfo map created
-          // during cloning in the FuncClonesToCallMap.
-          std::map<CallInfo, CallInfo> NewCallMap;
-          unsigned CloneNo = FuncClonesToCallMap.size();
-          assert(CloneNo > 0 && "Clone 0 is the original function, which "
-                                "should already exist in the map");
-          FuncInfo NewFuncClone = cloneFunctionForCallsite(
-              OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
-          FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
-          FunctionClonesAnalysis++;
-          Changed = true;
-
-          // If no caller callsites were already assigned to a clone of this
-          // function, we can simply assign this clone to the new func clone
-          // and update all callers to it, then skip to the next clone.
-          if (!CallerAssignedToCloneOfFunc) {
-            AssignCallsiteCloneToFuncClone(
-                NewFuncClone, Call, Clone,
-                AllocationCallToContextNodeMap.count(Call));
-            for (auto &CE : Clone->CallerEdges) {
-              // Ignore any caller that does not have a recorded callsite Call.
-              if (!CE->Caller->hasCall())
-                continue;
-              RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
-            }
-            continue;
-          }
-
-          // We may need to do additional node cloning in this case.
-          // Reset the CallsiteToCalleeFuncCloneMap entry for any callers
-          // that were previously assigned to call PreviousAssignedFuncClone,
-          // to record that they now call NewFuncClone.
-          for (auto CE : Clone->CallerEdges) {
-            // Ignore any caller that does not have a recorded callsite Call.
-            if (!CE->Caller->hasCall())
-              continue;
-
-            if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
-                // We subsequently fall through to later handling that
-                // will perform any additional cloning required for
-                // callers that were calling other function clones.
-                CallsiteToCalleeFuncCloneMap[CE->Caller] !=
-                    PreviousAssignedFuncClone)
-              continue;
-
-            RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
-
-            // If we are cloning a function that was already assigned to some
-            // callers, then essentially we are creating new callsite clones
-            // of the other callsites in that function that are reached by those
-            // callers. Clone the other callees of the current callsite's caller
-            // that were already assigned to PreviousAssignedFuncClone
-            // accordingly. This is important since we subsequently update the
-            // calls from the nodes in the graph and their assignments to callee
-            // functions recorded in CallsiteToCalleeFuncCloneMap.
-            for (auto CalleeEdge : CE->Caller->CalleeEdges) {
-              // Skip any that have been removed on an earlier iteration when
-              // cleaning up newly None type callee edges.
-              if (!CalleeEdge)
-                continue;
-              ContextNode *Callee = CalleeEdge->Callee;
-              // Skip the current callsite, we are looking for other
-              // callsites Caller calls, as well as any that does not have a
-              // recorded callsite Call.
-              if (Callee == Clone || !Callee->hasCall())
-                continue;
-              ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
-              removeNoneTypeCalleeEdges(NewClone);
-              // Moving the edge may have resulted in some none type
-              // callee edges on the original Callee.
-              removeNoneTypeCalleeEdges(Callee);
-              assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
-              // If the Callee node was already assigned to call a specific
-              // function version, make sure its new clone is assigned to call
-              // that same function clone.
-              if (CallsiteToCalleeFuncCloneMap.count(Callee))
-                RecordCalleeFuncOfCallsite(
-                    NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
-              // Update NewClone with the new Call clone of this callsite's Call
-              // created for the new function clone created earlier.
-              // Recall that we have already ensured when building the graph
-              // that each caller can only call callsites within the same
-              // function, so we are guaranteed that Callee Call is in the
-              // current OrigFunc.
-              // CallMap is set up as indexed by original Call at clone 0.
-              CallInfo OrigCall(Callee->getOrigNode()->Call);
-              OrigCall.setCloneNo(0);
-              std::map<CallInfo, CallInfo> &CallMap =
-                  FuncClonesToCallMap[NewFuncClone];
-              assert(CallMap.count(OrigCall));
-              CallInfo NewCall(CallMap[OrigCall]);
-              assert(NewCall);
-              NewClone->setCall(NewCall);
-            }
-          }
-          // Fall through to handling below to perform the recording of the
-          // function for this callsite clone. This enables handling of cases
-          // where the callers were assigned to 
diff erent clones of a function.
-        }
-
-        // See if we can use existing function clone. Walk through
-        // all caller edges to see if any have already been assigned to
-        // a clone of this callsite's function. If we can use it, do so. If not,
-        // because that function clone is already assigned to a 
diff erent clone
-        // of this callsite, then we need to clone again.
-        // Basically, this checking is needed to handle the case where 
diff erent
-        // caller functions/callsites may need versions of this function
-        // containing 
diff erent mixes of callsite clones across the 
diff erent
-        // callsites within the function. If that happens, we need to create
-        // additional function clones to handle the various combinations.
-        //
-        // Keep track of any new clones of this callsite created by the
-        // following loop, as well as any existing clone that we decided to
-        // assign this clone to.
-        std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
-        FuncInfo FuncCloneAssignedToCurCallsiteClone;
-        // We need to be able to remove Edge from CallerEdges, so need to adjust
-        // iterator in the loop.
-        for (auto EI = Clone->CallerEdges.begin();
-             EI != Clone->CallerEdges.end();) {
-          auto Edge = *EI;
-          // Ignore any caller that does not have a recorded callsite Call.
-          if (!Edge->Caller->hasCall()) {
-            EI++;
-            continue;
-          }
-          // If this caller already assigned to call a version of OrigFunc, need
-          // to ensure we can assign this callsite clone to that function clone.
-          if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
-            FuncInfo FuncCloneCalledByCaller =
-                CallsiteToCalleeFuncCloneMap[Edge->Caller];
-            // First we need to confirm that this function clone is available
-            // for use by this callsite node clone.
-            //
-            // While FuncCloneToCurNodeCloneMap is built only for this Node and
-            // its callsite clones, one of those callsite clones X could have
-            // been assigned to the same function clone called by Edge's caller
-            // - if Edge's caller calls another callsite within Node's original
-            // function, and that callsite has another caller reaching clone X.
-            // We need to clone Node again in this case.
-            if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
-                 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
-                     Clone) ||
-                // Detect when we have multiple callers of this callsite that
-                // have already been assigned to specific, and 
diff erent, clones
-                // of OrigFunc (due to other unrelated callsites in Func they
-                // reach via call contexts). Is this Clone of callsite Node
-                // assigned to a 
diff erent clone of OrigFunc? If so, clone Node
-                // again.
-                (FuncCloneAssignedToCurCallsiteClone &&
-                 FuncCloneAssignedToCurCallsiteClone !=
-                     FuncCloneCalledByCaller)) {
-              // We need to use a 
diff erent newly created callsite clone, in
-              // order to assign it to another new function clone on a
-              // subsequent iteration over the Clones array (adjusted below).
-              // Note we specifically do not reset the
-              // CallsiteToCalleeFuncCloneMap entry for this caller, so that
-              // when this new clone is processed later we know which version of
-              // the function to copy (so that other callsite clones we have
-              // assigned to that function clone are properly cloned over). See
-              // comments in the function cloning handling earlier.
-
-              // Check if we already have cloned this callsite again while
-              // walking through caller edges, for a caller calling the same
-              // function clone. If so, we can move this edge to that new clone
-              // rather than creating yet another new clone.
-              if (FuncCloneToNewCallsiteCloneMap.count(
-                      FuncCloneCalledByCaller)) {
-                ContextNode *NewClone =
-                    FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
-                moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
-                // Cleanup any none type edges cloned over.
-                removeNoneTypeCalleeEdges(NewClone);
-              } else {
-                // Create a new callsite clone.
-                ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
-                removeNoneTypeCalleeEdges(NewClone);
-                FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
-                    NewClone;
-                // Add to list of clones and process later.
-                ClonesWorklist.push_back(NewClone);
-                assert(EI == Clone->CallerEdges.end() ||
-                       Clone->AllocTypes != (uint8_t)AllocationType::None);
-                assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
-              }
-              // Moving the caller edge may have resulted in some none type
-              // callee edges.
-              removeNoneTypeCalleeEdges(Clone);
-              // We will handle the newly created callsite clone in a subsequent
-              // iteration over this Node's Clones. Continue here since we
-              // already adjusted iterator EI while moving the edge.
-              continue;
-            }
-
-            // Otherwise, we can use the function clone already assigned to this
-            // caller.
-            if (!FuncCloneAssignedToCurCallsiteClone) {
-              FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
-              // Assign Clone to FuncCloneCalledByCaller
-              AssignCallsiteCloneToFuncClone(
-                  FuncCloneCalledByCaller, Call, Clone,
-                  AllocationCallToContextNodeMap.count(Call));
-            } else
-              // Don't need to do anything - callsite is already calling this
-              // function clone.
-              assert(FuncCloneAssignedToCurCallsiteClone ==
-                     FuncCloneCalledByCaller);
-
-          } else {
-            // We have not already assigned this caller to a version of
-            // OrigFunc. Do the assignment now.
-
-            // First check if we have already assigned this callsite clone to a
-            // clone of OrigFunc for another caller during this iteration over
-            // its caller edges.
-            if (!FuncCloneAssignedToCurCallsiteClone) {
-              // Find first function in FuncClonesToCallMap without an assigned
-              // clone of this callsite Node. We should always have one
-              // available at this point due to the earlier cloning when the
-              // FuncClonesToCallMap size was smaller than the clone number.
-              for (auto &CF : FuncClonesToCallMap) {
-                if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
-                  FuncCloneAssignedToCurCallsiteClone = CF.first;
-                  break;
-                }
-              }
-              assert(FuncCloneAssignedToCurCallsiteClone);
-              // Assign Clone to FuncCloneAssignedToCurCallsiteClone
-              AssignCallsiteCloneToFuncClone(
-                  FuncCloneAssignedToCurCallsiteClone, Call, Clone,
-                  AllocationCallToContextNodeMap.count(Call));
-            } else
-              assert(FuncCloneToCurNodeCloneMap
-                         [FuncCloneAssignedToCurCallsiteClone] == Clone);
-            // Update callers to record function version called.
-            RecordCalleeFuncOfCallsite(Edge->Caller,
-                                       FuncCloneAssignedToCurCallsiteClone);
-          }
-
-          EI++;
-        }
-      }
-      if (VerifyCCG) {
-        checkNode<DerivedCCG, FuncTy, CallTy>(Node);
-        for (const auto &PE : Node->CalleeEdges)
-          checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
-        for (const auto &CE : Node->CallerEdges)
-          checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
-        for (auto *Clone : Node->Clones) {
-          checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
-          for (const auto &PE : Clone->CalleeEdges)
-            checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
-          for (const auto &CE : Clone->CallerEdges)
-            checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
-        }
-      }
-    }
-  }
-
-  auto UpdateCalls = [&](ContextNode *Node,
-                         DenseSet<const ContextNode *> &Visited,
-                         auto &&UpdateCalls) {
-    auto Inserted = Visited.insert(Node);
-    if (!Inserted.second)
-      return;
-
-    for (auto *Clone : Node->Clones)
-      UpdateCalls(Clone, Visited, UpdateCalls);
-
-    for (auto &Edge : Node->CallerEdges)
-      UpdateCalls(Edge->Caller, Visited, UpdateCalls);
-
-    // Skip if either no call to update, or if we ended up with no context ids
-    // (we moved all edges onto other clones).
-    if (!Node->hasCall() || Node->ContextIds.empty())
-      return;
-
-    if (Node->IsAllocation) {
-      updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
-      return;
-    }
-
-    if (!CallsiteToCalleeFuncCloneMap.count(Node))
-      return;
-
-    auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
-    updateCall(Node->Call, CalleeFunc);
-  };
-
-  // Sort the allocation nodes based on the OrigStackOrAllocId, which increase
-  // in insertion order, so that the following loop is deterministic (since the
-  // AllocationCallToContextNodeMap is keyed by a pointer). Specifically this
-  // can affect the order of the remarks emitted for regular LTO IR updates
-  // during the call updating.
-  std::vector<ContextNode *> AllocationNodes;
-  AllocationNodes.reserve(AllocationCallToContextNodeMap.size());
-  for (auto &Entry : AllocationCallToContextNodeMap)
-    AllocationNodes.push_back(Entry.second);
-  std::sort(AllocationNodes.begin(), AllocationNodes.end(),
-            [](const ContextNode *A, const ContextNode *B) {
-              return A->OrigStackOrAllocId < B->OrigStackOrAllocId;
-            });
-
-  // Performs DFS traversal starting from allocation nodes to update calls to
-  // reflect cloning decisions recorded earlier. For regular LTO this will
-  // update the actual calls in the IR to call the appropriate function clone
-  // (and add attributes to allocation calls), whereas for ThinLTO the decisions
-  // are recorded in the summary entries.
-  DenseSet<const ContextNode *> Visited;
-  for (auto *AllocNode : AllocationNodes)
-    UpdateCalls(AllocNode, Visited, UpdateCalls);
-
-  return Changed;
+    checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/true);
 }
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -2803,24 +2149,13 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
   if (ExportToDot)
     exportToDot("cloned");
 
-  bool Changed = assignFunctions();
-
-  if (DumpCCG) {
-    dbgs() << "CCG after assigning function clones:\n";
-    dbgs() << *this;
-  }
-  if (ExportToDot)
-    exportToDot("clonefuncassign");
-
-  return Changed;
+  return false;
 }
 
-bool MemProfContextDisambiguation::processModule(
-    Module &M,
-    function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
+bool MemProfContextDisambiguation::processModule(Module &M) {
   bool Changed = false;
 
-  ModuleCallsiteContextGraph CCG(M, OREGetter);
+  ModuleCallsiteContextGraph CCG(M);
   Changed = CCG.process();
 
   return Changed;
@@ -2828,11 +2163,7 @@ bool MemProfContextDisambiguation::processModule(
 
 PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
                                                     ModuleAnalysisManager &AM) {
-  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
-  auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
-    return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
-  };
-  if (!processModule(M, OREGetter))
+  if (!processModule(M))
     return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }

diff  --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index 11378cf5bef47..4df89cdb12afd 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -42,35 +42,13 @@
 ; RUN:	-r=%t.o,_Znam, \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:	-stats -pass-remarks=memprof-context-disambiguation -save-temps \
-; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:	--check-prefix=STATS
+; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should have cloned bar, baz, and foo, for the cold memory allocation.
 ; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
-;; Try again but with distributed ThinLTO
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:  -thinlto-distributed-indexes \
-; RUN:	-r=%t.o,main,plx \
-; RUN:	-r=%t.o,_ZdaPv, \
-; RUN:	-r=%t.o,sleep, \
-; RUN:	-r=%t.o,_Znam, \
-; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
-; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
-; RUN:	-o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:	--check-prefix=STATS
-
-; RUN:	cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
-;; We should have cloned bar, baz, and foo, for the cold memory allocation.
-; RUN:	cat %t2.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
-
-;; Check distributed index
-; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB
-
 source_filename = "memprof-basic.ll"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -252,11 +230,6 @@ uselistorder ptr @_Z3foov, { 1, 0 }
 ; DUMP:		Clone of [[BAR]]
 
 
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
-
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
 ; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
@@ -288,9 +261,3 @@ uselistorder ptr @_Z3foov, { 1, 0 }
 ; DOTCLONED: 	Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
 ; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];
 ; DOTCLONED: }
-
-
-; DISTRIB: ^[[BAZ:[0-9]+]] = gv: (guid: 5878270615442837395, {{.*}} callsites: ((callee: ^[[BAR:[0-9]+]], clones: (0, 1)
-; DISTRIB: ^[[FOO:[0-9]+]] = gv: (guid: 6731117468105397038, {{.*}} callsites: ((callee: ^[[BAZ]], clones: (0, 1)
-; DISTRIB: ^[[BAR]] = gv: (guid: 9832687305761716512, {{.*}} allocs: ((versions: (notcold, cold)
-; DISTRIB: ^[[MAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[FOO]], clones: (0), {{.*}} (callee: ^[[FOO]], clones: (1)

diff  --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
index 7f7447eaf58e4..12e2fc39b5f5e 100644
--- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
+++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
@@ -1,8 +1,7 @@
 ;; Test callsite context graph generation for call graph with with MIBs
 ;; that have pruned contexts that partially match multiple inlined
 ;; callsite contexts, requiring duplication of context ids and nodes
-;; while matching callsite nodes onto the graph. Also tests graph and IR
-;; cloning.
+;; while matching callsite nodes onto the graph.
 ;;
 ;; Original code looks like:
 ;;
@@ -64,9 +63,7 @@
 ; RUN:  -r=%t.o,_Znam, \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
-; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
+; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
@@ -74,27 +71,6 @@
 ; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
-;; Try again but with distributed ThinLTO
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:  -thinlto-distributed-indexes \
-; RUN:  -r=%t.o,main,plx \
-; RUN:  -r=%t.o,_ZdaPv, \
-; RUN:  -r=%t.o,sleep, \
-; RUN:  -r=%t.o,_Znam, \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
-
-; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
-; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
-;; We should clone D once for the cold allocations via C.
-; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
-
-;; Check distributed index
-; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB
-
 source_filename = "duplicate-context-ids.ll"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -131,13 +107,7 @@ entry:
   ret ptr null
 }
 
-define i32 @main() {
-entry:
-  call ptr @_Z1Bv()
-  call ptr @_Z1Ev()
-  call ptr @_Z1Fv()
-  ret i32 0
-}
+declare i32 @main()
 
 declare void @_ZdaPv()
 
@@ -301,11 +271,6 @@ declare i32 @sleep()
 ; DUMP:         Clone of [[D]]
 
 
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
-
 ; DOTPRE: digraph "prestackupdate" {
 ; DOTPRE: 	label="prestackupdate";
 ; DOTPRE: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
@@ -343,9 +308,3 @@ declare i32 @sleep()
 ; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
 ; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
 ; DOTCLONED: }
-
-; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 1643923691937891493, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1)
-; DISTRIB: ^[[D]] = gv: (guid: 4881081444663423788, {{.*}} allocs: ((versions: (notcold, cold)
-; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 14590037969532473829, {{.*}} callsites: ((callee: ^[[D]], clones: (1)
-; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 17035303613541779335, {{.*}} callsites: ((callee: ^[[D]], clones: (0)
-; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 17820708772846654376, {{.*}} callsites: ((callee: ^[[D]], clones: (1)

diff  --git a/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll b/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
deleted file mode 100644
index 54aad0dc94ac0..0000000000000
--- a/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
+++ /dev/null
@@ -1,235 +0,0 @@
-;; Test context disambiguation for a callgraph containing multiple memprof
-;; contexts and no inlining, where we need to perform additional cloning
-;; during function assignment/cloning to handle the combination of contexts
-;; to 2 
diff erent allocations.
-;;
-;; void E(char **buf1, char **buf2) {
-;;   *buf1 = new char[10];
-;;   *buf2 = new char[10];
-;; }
-;;
-;; void B(char **buf1, char **buf2) {
-;;   E(buf1, buf2);
-;; }
-;;
-;; void C(char **buf1, char **buf2) {
-;;   E(buf1, buf2);
-;; }
-;;
-;; void D(char **buf1, char **buf2) {
-;;   E(buf1, buf2);
-;; }
-;; int main(int argc, char **argv) {
-;;   char *cold1, *cold2, *default1, *default2, *default3, *default4;
-;;   B(&default1, &default2);
-;;   C(&default3, &cold1);
-;;   D(&cold2, &default4);
-;;   memset(cold1, 0, 10);
-;;   memset(cold2, 0, 10);
-;;   memset(default1, 0, 10);
-;;   memset(default2, 0, 10);
-;;   memset(default3, 0, 10);
-;;   memset(default4, 0, 10);
-;;   delete[] default1;
-;;   delete[] default2;
-;;   delete[] default3;
-;;   delete[] default4;
-;;   sleep(10);
-;;   delete[] cold1;
-;;   delete[] cold2;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-;; -stats requires asserts
-; REQUIRES: asserts
-
-
-; RUN: opt -thinlto-bc %s >%t.o
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:  -r=%t.o,main,plx \
-; RUN:  -r=%t.o,_ZdaPv, \
-; RUN:  -r=%t.o,sleep, \
-; RUN:  -r=%t.o,_Znam, \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
-; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
-
-
-;; Try again but with distributed ThinLTO
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:  -thinlto-distributed-indexes \
-; RUN:  -r=%t.o,main,plx \
-; RUN:  -r=%t.o,_ZdaPv, \
-; RUN:  -r=%t.o,sleep, \
-; RUN:  -r=%t.o,_Znam, \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
-
-
-source_filename = "funcassigncloning.ll"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: noinline optnone
-define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) {
-entry:
-  %call = call ptr @_Znam(i64 noundef 10), !memprof !0, !callsite !7
-  %call1 = call ptr @_Znam(i64 noundef 10), !memprof !8, !callsite !15
-  ret void
-}
-
-declare ptr @_Znam(i64)
-
-define internal void @_Z1BPPcS0_() {
-entry:
-  call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !16
-  ret void
-}
-
-define internal void @_Z1CPPcS0_() {
-entry:
-  call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !17
-  ret void
-}
-
-define internal void @_Z1DPPcS0_() {
-entry:
-  call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !18
-  ret void
-}
-
-; Function Attrs: noinline optnone
-define i32 @main() {
-entry:
-  call void @_Z1BPPcS0_()
-  call void @_Z1CPPcS0_()
-  call void @_Z1DPPcS0_()
-  ret i32 0
-}
-
-declare void @_ZdaPv()
-
-declare i32 @sleep()
-
-; uselistorder directives
-uselistorder ptr @_Znam, { 1, 0 }
-
-!0 = !{!1, !3, !5}
-!1 = !{!2, !"cold"}
-!2 = !{i64 -3461278137325233666, i64 -7799663586031895603}
-!3 = !{!4, !"notcold"}
-!4 = !{i64 -3461278137325233666, i64 -3483158674395044949}
-!5 = !{!6, !"notcold"}
-!6 = !{i64 -3461278137325233666, i64 -2441057035866683071}
-!7 = !{i64 -3461278137325233666}
-!8 = !{!9, !11, !13}
-!9 = !{!10, !"notcold"}
-!10 = !{i64 -1415475215210681400, i64 -2441057035866683071}
-!11 = !{!12, !"cold"}
-!12 = !{i64 -1415475215210681400, i64 -3483158674395044949}
-!13 = !{!14, !"notcold"}
-!14 = !{i64 -1415475215210681400, i64 -7799663586031895603}
-!15 = !{i64 -1415475215210681400}
-!16 = !{i64 -2441057035866683071}
-!17 = !{i64 -3483158674395044949}
-!18 = !{i64 -7799663586031895603}
-
-
-;; Originally we create a single clone of each call to new from E, since each
-;; allocates cold memory for a single caller.
-
-; DUMP: CCG after cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]]
-; DUMP:         Versions: 1 MIB:
-; DUMP:                 AllocType 2 StackIds: 0
-; DUMP:                 AllocType 1 StackIds: 1
-; DUMP:                 AllocType 1 StackIds: 2
-; DUMP:         (clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 2 3
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 	Clones: [[ENEW1CLONE:0x[a-z0-9]+]]
-
-; DUMP: Node [[D:0x[a-z0-9]+]]
-; DUMP: 	Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 1 6
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
-; DUMP: 		Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[C]]
-; DUMP: 	Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 2 5
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2
-; DUMP: 		Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[B]]
-; DUMP: 	Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 3 4
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[ENEW2ORIG]]
-; DUMP:         Versions: 1 MIB:
-; DUMP:                 AllocType 1 StackIds: 2
-; DUMP:                 AllocType 2 StackIds: 1
-; DUMP:                 AllocType 1 StackIds: 0
-; DUMP:         (clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 4 6
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
-; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
-; DUMP: 	Clones: [[ENEW2CLONE]]
-
-; DUMP: Node [[ENEW1CLONE]]
-; DUMP:         Versions: 1 MIB:
-; DUMP:                 AllocType 2 StackIds: 0
-; DUMP:                 AllocType 1 StackIds: 1
-; DUMP:                 AllocType 1 StackIds: 2
-; DUMP:         (clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
-; DUMP: 	Clone of [[ENEW1ORIG]]
-
-; DUMP: Node [[ENEW2CLONE]]
-; DUMP:         Versions: 1 MIB:
-; DUMP:                 AllocType 1 StackIds: 2
-; DUMP:                 AllocType 2 StackIds: 1
-; DUMP:                 AllocType 1 StackIds: 0
-; DUMP:         (clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 5
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
-; DUMP: 	Clone of [[ENEW2ORIG]]
-
-
-; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis

diff  --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
index c311d6243688f..bd9f5e9250592 100644
--- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
+++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
@@ -1,7 +1,7 @@
 ;; Tests callsite context graph generation for call graph containing indirect
 ;; calls. Currently this should result in conservative behavior, such that the
 ;; indirect call receives a null call in its graph node, to prevent subsequent
-;; cloning. Also tests graph and IR cloning.
+;; cloning.
 ;;
 ;; Original code looks like:
 ;;
@@ -64,9 +64,7 @@
 ; RUN:  -r=%t.o,_ZTVN10__cxxabiv117__class_type_infoE, \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
-; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
+; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should only create a single clone of foo, for the direct call
@@ -74,26 +72,6 @@
 ; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
-;; Try again but with distributed ThinLTO
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:  -thinlto-distributed-indexes \
-; RUN:  -r=%t.o,main,plx \
-; RUN:  -r=%t.o,_ZdaPv, \
-; RUN:  -r=%t.o,sleep, \
-; RUN:  -r=%t.o,_Znam, \
-; RUN:  -r=%t.o,_ZTVN10__cxxabiv120__si_class_type_infoE, \
-; RUN:  -r=%t.o,_ZTVN10__cxxabiv117__class_type_infoE, \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
-
-; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
-;; We should only create a single clone of foo, for the direct call
-;; from main allocating cold memory.
-; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
-
 source_filename = "indirectcall.ll"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -384,11 +362,6 @@ uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
 ; DUMP:		Clone of [[FOO]]
 
 
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
-
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
 ; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"];

diff  --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll
index 27eab8a5bcd20..e87168b4e3f92 100644
--- a/llvm/test/ThinLTO/X86/memprof-inlined.ll
+++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll
@@ -1,7 +1,6 @@
 ;; Test callsite context graph generation for call graph with two memprof
 ;; contexts and partial inlining, requiring generation of a new fused node to
 ;; represent the inlined sequence while matching callsite nodes onto the graph.
-;; Also tests graph and IR cloning.
 ;;
 ;; Original code looks like:
 ;;
@@ -52,9 +51,7 @@
 ; RUN:	-r=%t.o,_Znam, \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
-; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
+; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should create clones for foo and bar for the call from main to allocate
@@ -62,24 +59,6 @@
 ; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
 
-;; Try again but with distributed ThinLTO
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:  -thinlto-distributed-indexes \
-; RUN:  -r=%t.o,main,plx \
-; RUN:  -r=%t.o,_ZdaPv, \
-; RUN:  -r=%t.o,sleep, \
-; RUN:  -r=%t.o,_Znam, \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:  --check-prefix=STATS
-
-; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
-;; We should create clones for foo and bar for the call from main to allocate
-;; cold memory.
-; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
-
 source_filename = "inlined.ll"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -281,11 +260,6 @@ declare i32 @sleep()
 ; DUMP:         Clone of [[BAR]]
 
 
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
-
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
 ; DOT: 	Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"];

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index bd938754ce9d0..99a8d68a5b1d2 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -1,5 +1,5 @@
 ;; Test callsite context graph generation for simple call graph with
-;; two memprof contexts and no inlining, as well as graph and IR cloning.
+;; two memprof contexts and no inlining.
 ;;
 ;; Original code looks like:
 ;;
@@ -37,9 +37,7 @@
 ; RUN: opt -passes=memprof-context-disambiguation \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
-; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN:	--check-prefix=STATS --check-prefix=REMARKS
+; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -227,48 +225,6 @@ attributes #6 = { builtin }
 ; DUMP:		Clone of [[BAR]]
 
 
-; REMARKS: created clone _Z3barv.memprof.1
-; REMARKS: created clone _Z3bazv.memprof.1
-; REMARKS: created clone _Z3foov.memprof.1
-; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
-; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1
-; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
-; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
-; REMARKS: call in clone main assigned to call function clone _Z3foov
-; REMARKS: call in clone _Z3foov assigned to call function clone _Z3bazv
-; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
-; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
-
-
-; IR: define {{.*}} @main
-;; The first call to foo does not allocate cold memory. It should call the
-;; original functions, which ultimately call the original allocation decorated
-;; with a "notcold" attribute.
-; IR:   call {{.*}} @_Z3foov()
-;; The second call to foo allocates cold memory. It should call cloned functions
-;; which ultimately call a cloned allocation decorated with a "cold" attribute.
-; IR:   call {{.*}} @_Z3foov.memprof.1()
-; IR: define internal {{.*}} @_Z3barv()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
-; IR: define internal {{.*}} @_Z3bazv()
-; IR:   call {{.*}} @_Z3barv()
-; IR: define internal {{.*}} @_Z3foov()
-; IR:   call {{.*}} @_Z3bazv()
-; IR: define internal {{.*}} @_Z3barv.memprof.1()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
-; IR: define internal {{.*}} @_Z3bazv.memprof.1()
-; IR:   call {{.*}} @_Z3barv.memprof.1()
-; IR: define internal {{.*}} @_Z3foov.memprof.1()
-; IR:   call {{.*}} @_Z3bazv.memprof.1()
-; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
-; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
-
-
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
-
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
 ; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
index 1f23ad3c6a51b..143f892c18950 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
@@ -1,8 +1,7 @@
 ;; Test callsite context graph generation for call graph with with MIBs
 ;; that have pruned contexts that partially match multiple inlined
 ;; callsite contexts, requiring duplication of context ids and nodes
-;; while matching callsite nodes onto the graph. Also tests graph and IR
-;; cloning.
+;; while matching callsite nodes onto the graph.
 ;;
 ;; Original code looks like:
 ;;
@@ -59,9 +58,7 @@
 ; RUN: opt -passes=memprof-context-disambiguation \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN:  --check-prefix=STATS --check-prefix=REMARKS
+; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
@@ -269,39 +266,6 @@ attributes #6 = { builtin }
 ; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
 ; DUMP:         Clone of [[D]]
 
-; REMARKS: created clone _Z1Dv.memprof.1
-; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1
-; REMARKS: call in clone _Z1Cv assigned to call function clone _Z1Dv.memprof.1
-; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1
-; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
-; REMARKS: call in clone _Z1Fv assigned to call function clone _Z1Dv
-; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold
-
-
-;; The allocation via F does not allocate cold memory. It should call the
-;; original D, which ultimately call the original allocation decorated
-;; with a "notcold" attribute.
-; IR: define internal {{.*}} @_Z1Dv()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
-; IR: define internal {{.*}} @_Z1Fv()
-; IR:   call {{.*}} @_Z1Dv()
-;; The allocations via B and E allocate cold memory. They should call the
-;; cloned D, which ultimately call the cloned allocation decorated with a
-;; "cold" attribute.
-; IR: define internal {{.*}} @_Z1Bv()
-; IR:   call {{.*}} @_Z1Dv.memprof.1()
-; IR: define internal {{.*}} @_Z1Ev()
-; IR:   call {{.*}} @_Z1Dv.memprof.1()
-; IR: define internal {{.*}} @_Z1Dv.memprof.1()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
-; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
-; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
-
-
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
 
 ; DOTPRE: digraph "prestackupdate" {
 ; DOTPRE: 	label="prestackupdate";

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll b/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll
deleted file mode 100644
index b94e9b855b747..0000000000000
--- a/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll
+++ /dev/null
@@ -1,247 +0,0 @@
-;; Test context disambiguation for a callgraph containing multiple memprof
-;; contexts and no inlining, where we need to perform additional cloning
-;; during function assignment/cloning to handle the combination of contexts
-;; to 2 
diff erent allocations.
-;;
-;; void E(char **buf1, char **buf2) {
-;;   *buf1 = new char[10];
-;;   *buf2 = new char[10];
-;; }
-;;
-;; void B(char **buf1, char **buf2) {
-;;   E(buf1, buf2);
-;; }
-;;
-;; void C(char **buf1, char **buf2) {
-;;   E(buf1, buf2);
-;; }
-;;
-;; void D(char **buf1, char **buf2) {
-;;   E(buf1, buf2);
-;; }
-;; int main(int argc, char **argv) {
-;;   char *cold1, *cold2, *default1, *default2, *default3, *default4;
-;;   B(&default1, &default2);
-;;   C(&default3, &cold1);
-;;   D(&cold2, &default4);
-;;   memset(cold1, 0, 10);
-;;   memset(cold2, 0, 10);
-;;   memset(default1, 0, 10);
-;;   memset(default2, 0, 10);
-;;   memset(default3, 0, 10);
-;;   memset(default4, 0, 10);
-;;   delete[] default1;
-;;   delete[] default2;
-;;   delete[] default3;
-;;   delete[] default4;
-;;   sleep(10);
-;;   delete[] cold1;
-;;   delete[] cold2;
-;;   return 0;
-;; }
-;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
-;; memory freed after sleep(10) results in cold lifetimes.
-;;
-;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
-
-;; -stats requires asserts
-; REQUIRES: asserts
-
-; RUN: opt -passes=memprof-context-disambiguation \
-; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN:  --check-prefix=STATS --check-prefix=REMARKS
-
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) #0 {
-entry:
-  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !7
-  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !8, !callsite !15
-  ret void
-}
-
-declare ptr @_Znam(i64) #1
-
-define internal void @_Z1BPPcS0_(ptr %0, ptr %1) {
-entry:
-  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !16
-  ret void
-}
-
-; Function Attrs: noinline
-define internal void @_Z1CPPcS0_(ptr %0, ptr %1) #2 {
-entry:
-  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !17
-  ret void
-}
-
-define internal void @_Z1DPPcS0_(ptr %0, ptr %1) #3 {
-entry:
-  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !18
-  ret void
-}
-
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
-
-declare i32 @sleep() #5
-
-; uselistorder directives
-uselistorder ptr @_Znam, { 1, 0 }
-
-attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
-attributes #1 = { "no-trapping-math"="true" }
-attributes #2 = { noinline }
-attributes #3 = { "frame-pointer"="all" }
-attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
-attributes #5 = { "disable-tail-calls"="true" }
-attributes #6 = { builtin }
-
-!0 = !{!1, !3, !5}
-!1 = !{!2, !"cold"}
-!2 = !{i64 -3461278137325233666, i64 -7799663586031895603}
-!3 = !{!4, !"notcold"}
-!4 = !{i64 -3461278137325233666, i64 -3483158674395044949}
-!5 = !{!6, !"notcold"}
-!6 = !{i64 -3461278137325233666, i64 -2441057035866683071}
-!7 = !{i64 -3461278137325233666}
-!8 = !{!9, !11, !13}
-!9 = !{!10, !"notcold"}
-!10 = !{i64 -1415475215210681400, i64 -2441057035866683071}
-!11 = !{!12, !"cold"}
-!12 = !{i64 -1415475215210681400, i64 -3483158674395044949}
-!13 = !{!14, !"notcold"}
-!14 = !{i64 -1415475215210681400, i64 -7799663586031895603}
-!15 = !{i64 -1415475215210681400}
-!16 = !{i64 -2441057035866683071}
-!17 = !{i64 -3483158674395044949}
-!18 = !{i64 -7799663586031895603}
-
-
-;; Originally we create a single clone of each call to new from E, since each
-;; allocates cold memory for a single caller.
-
-; DUMP: CCG after cloning:
-; DUMP: Callsite Context Graph:
-; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 2 3
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 	Clones: [[ENEW1CLONE:0x[a-z0-9]+]]
-
-; DUMP: Node [[D:0x[a-z0-9]+]]
-; DUMP:           call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0)
-; DUMP:         AllocTypes: NotColdCold
-; DUMP:         ContextIds: 1 6
-; DUMP:         CalleeEdges:
-; DUMP:                 Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
-; DUMP:                 Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
-; DUMP:         CallerEdges:
-
-; DUMP: Node [[C]]
-; DUMP: 	  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1)	(clone 0)
-; DUMP: 	AllocTypes: NotColdCold
-; DUMP: 	ContextIds: 2 5
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2
-; DUMP: 		Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[B]]
-; DUMP: 	  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1)	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 3 4
-; DUMP: 	CalleeEdges:
-; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3
-; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
-; DUMP: 	CallerEdges:
-
-; DUMP: Node [[ENEW2ORIG]]
-; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
-; DUMP: 	AllocTypes: NotCold
-; DUMP: 	ContextIds: 4 6
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
-; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
-; DUMP: 	Clones: [[ENEW2CLONE]]
-
-; DUMP: Node [[ENEW1CLONE]]
-; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 1
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
-; DUMP: 	Clone of [[ENEW1ORIG]]
-
-; DUMP: Node [[ENEW2CLONE]]
-; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
-; DUMP: 	AllocTypes: Cold
-; DUMP: 	ContextIds: 5
-; DUMP: 	CalleeEdges:
-; DUMP: 	CallerEdges:
-; DUMP: 		Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
-; DUMP: 	Clone of [[ENEW2ORIG]]
-
-
-;; We greedily create a clone of E that is initially used by the clones of the
-;; first call to new. However, we end up with an incompatible set of callers
-;; given the second call to new which has clones with a 
diff erent combination of
-;; callers. Eventually, we create 2 more clones, and the first clone becomes dead.
-; REMARKS: created clone _Z1EPPcS0_.memprof.1
-; REMARKS: created clone _Z1EPPcS0_.memprof.2
-; REMARKS: created clone _Z1EPPcS0_.memprof.3
-; REMARKS: call in clone _Z1DPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.2
-; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute cold
-; REMARKS: call in clone _Z1CPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.3
-; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute notcold
-; REMARKS: call in clone _Z1BPPcS0_ assigned to call function clone _Z1EPPcS0_
-; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
-; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute notcold
-; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute cold
-; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
-
-
-;; Original version of E is used for the non-cold allocations, both from B.
-; IR: define internal {{.*}} @_Z1EPPcS0_(
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
-; IR: define internal {{.*}} @_Z1BPPcS0_(
-; IR:   call {{.*}} @_Z1EPPcS0_(
-;; C calls a clone of E with the first new allocating cold memory and the
-;; second allocating non-cold memory.
-; IR: define internal {{.*}} @_Z1CPPcS0_(
-; IR:   call {{.*}} @_Z1EPPcS0_.memprof.3(
-;; D calls a clone of E with the first new allocating non-cold memory and the
-;; second allocating cold memory.
-; IR: define internal {{.*}} @_Z1DPPcS0_(
-; IR:   call {{.*}} @_Z1EPPcS0_.memprof.2(
-;; Transient clone that will get removed as it ends up with no callers.
-;; Its calls to new never get updated with a memprof attribute as a result.
-; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.1(
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT:[0-9]+]]
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT]]
-; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2(
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
-; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3(
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
-; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
-; IR: attributes #[[DEFAULT]] = { builtin }
-; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
-
-
-; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
index f3216aa13d88f..49ca9407d9250 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
@@ -1,7 +1,7 @@
 ;; Tests callsite context graph generation for call graph containing indirect
 ;; calls. Currently this should result in conservative behavior, such that the
 ;; indirect call receives a null call in its graph node, to prevent subsequent
-;; cloning. Also tests graph and IR cloning.
+;; cloning.
 ;;
 ;; Original code looks like:
 ;;
@@ -57,9 +57,7 @@
 ; RUN: opt -passes=memprof-context-disambiguation \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN:  --check-prefix=STATS --check-prefix=REMARKS
+; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should only create a single clone of foo, for the direct call
@@ -345,41 +343,6 @@ attributes #7 = { builtin }
 ; DUMP:		Clone of [[FOO]]
 
 
-; REMARKS: created clone _Z3foov.memprof.1
-; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
-; REMARKS: call in clone _Z3foov.memprof.1 marked with memprof allocation attribute cold
-; REMARKS: call in clone _ZN1A1xEv assigned to call function clone _Z3foov
-; REMARKS: call in clone _ZN1B1xEv assigned to call function clone _Z3foov
-; REMARKS: call in clone main assigned to call function clone _Z3foov
-; REMARKS: call in clone _Z3foov marked with memprof allocation attribute notcold
-
-
-; IR: define {{.*}} @main(
-; IR:   call {{.*}} @_Z3foov()
-;; Only the second call to foo, which allocates cold memory via direct calls,
-;; is replaced with a call to a clone that calls a cold allocation.
-; IR:   call {{.*}} @_Z3foov.memprof.1()
-; IR:   call {{.*}} @_Z3barP1A(
-; IR:   call {{.*}} @_Z3barP1A(
-; IR:   call {{.*}} @_Z3barP1A(
-; IR:   call {{.*}} @_Z3barP1A(
-; IR: define internal {{.*}} @_ZN1A1xEv(
-; IR:   call {{.*}} @_Z3foov()
-; IR: define internal {{.*}} @_ZN1B1xEv(
-; IR:   call {{.*}} @_Z3foov()
-; IR: define internal {{.*}} @_Z3foov()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
-; IR: define internal {{.*}} @_Z3foov.memprof.1()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
-; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
-; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
-
-
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
-
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
 ; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
index f1b74f13fb148..70a6f39980ede 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
@@ -1,7 +1,6 @@
 ;; Test callsite context graph generation for call graph with two memprof
 ;; contexts and partial inlining, requiring generation of a new fused node to
 ;; represent the inlined sequence while matching callsite nodes onto the graph.
-;; Also tests graph and IR cloning.
 ;;
 ;; Original code looks like:
 ;;
@@ -47,9 +46,7 @@
 ; RUN: opt -passes=memprof-context-disambiguation \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
-; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
-; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN:  --check-prefix=STATS --check-prefix=REMARKS
+; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should create clones for foo and bar for the call from main to allocate
@@ -257,42 +254,6 @@ attributes #7 = { builtin }
 ; DUMP:         Clone of [[BAR]]
 
 
-; REMARKS: created clone _Z3barv.memprof.1
-; REMARKS: created clone _Z3foov.memprof.1
-; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
-; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3barv.memprof.1
-; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
-; REMARKS: call in clone main assigned to call function clone _Z3foov
-; REMARKS: call in clone _Z3foov assigned to call function clone _Z3barv
-; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
-; REMARKS: call in clone _Z3bazv marked with memprof allocation attribute notcold
-
-
-; IR: define internal {{.*}} @_Z3barv()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
-; IR: define internal {{.*}} @_Z3foov()
-; IR:   call {{.*}} @_Z3barv()
-; IR: define {{.*}} @main()
-;; The first call to foo does not allocate cold memory. It should call the
-;; original functions, which ultimately call the original allocation decorated
-;; with a "notcold" attribute.
-; IR:   call {{.*}} @_Z3foov()
-;; The second call to foo allocates cold memory. It should call cloned functions
-;; which ultimately call a cloned allocation decorated with a "cold" attribute.
-; IR:   call {{.*}} @_Z3foov.memprof.1()
-; IR: define internal {{.*}} @_Z3barv.memprof.1()
-; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
-; IR: define internal {{.*}} @_Z3foov.memprof.1()
-; IR:   call {{.*}} @_Z3barv.memprof.1()
-; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
-; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
-
-
-; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
-; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
-; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis
-
-
 ; DOT: digraph "postbuild" {
 ; DOT: 	label="postbuild";
 ; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];


        


More information about the llvm-commits mailing list