[llvm] 9f8205d - [MemProf] Track and report profiled sizes through cloning (#98382)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 11 16:10:34 PDT 2024


Author: Teresa Johnson
Date: 2024-07-11T16:10:30-07:00
New Revision: 9f8205d9d8ddccd5c821c2a654805434706a43c2

URL: https://github.com/llvm/llvm-project/commit/9f8205d9d8ddccd5c821c2a654805434706a43c2
DIFF: https://github.com/llvm/llvm-project/commit/9f8205d9d8ddccd5c821c2a654805434706a43c2.diff

LOG: [MemProf] Track and report profiled sizes through cloning (#98382)

If requested, via the -memprof-report-hinted-sizes option, track the
total profiled size of each MIB through the thin link, then report on
the corresponding allocation coldness after all cloning is complete.

To save size, a different bitcode record type is used for the allocation
info when the option is specified, and the sizes are kept separate from
the MIBs in the index.

Added: 
    

Modified: 
    llvm/include/llvm/Bitcode/LLVMBitCodes.h
    llvm/include/llvm/IR/ModuleSummaryIndex.h
    llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
    llvm/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
    llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
    llvm/test/Bitcode/summary_version.ll
    llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
    llvm/test/ThinLTO/X86/memprof-basic.ll
    llvm/test/Transforms/MemProfContextDisambiguation/basic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 5b5e08b5cbc3f..184bbe32df695 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -307,7 +307,8 @@ enum GlobalValueSummarySymtabCodes {
   // [valueid, n x stackidindex]
   FS_PERMODULE_CALLSITE_INFO = 26,
   // Summary of per-module allocation memprof metadata.
-  // [n x (alloc type, nummib, nummib x stackidindex)]
+  // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
+  // [nummib x total size]?]
   FS_PERMODULE_ALLOC_INFO = 27,
   // Summary of combined index memprof callsite metadata.
   // [valueid, numstackindices, numver,
@@ -316,7 +317,7 @@ enum GlobalValueSummarySymtabCodes {
   // Summary of combined index allocation memprof metadata.
   // [nummib, numver,
   //  nummib x (alloc type, numstackids, numstackids x stackidindex),
-  //  numver x version]
+  //  numver x version, [nummib x total size]?]
   FS_COMBINED_ALLOC_INFO = 29,
   FS_STACK_IDS = 30,
 };

diff  --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 31271ed388e54..00934cc1ce6f2 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -403,6 +403,10 @@ struct AllocInfo {
   // Vector of MIBs in this memprof metadata.
   std::vector<MIBInfo> MIBs;
 
+  // If requested, keep track of total profiled sizes for each MIB. This will be
+  // a vector of the same length and order as the MIBs vector, if non-empty.
+  std::vector<uint64_t> TotalSizes;
+
   AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
     Versions.push_back(0);
   }
@@ -423,6 +427,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
   for (auto &M : AE.MIBs) {
     OS << "\t\t" << M << "\n";
   }
+  if (!AE.TotalSizes.empty()) {
+    OS << " TotalSizes per MIB:\n\t\t";
+    First = true;
+    for (uint64_t TS : AE.TotalSizes) {
+      if (!First)
+        OS << ", ";
+      First = false;
+      OS << TS << "\n";
+    }
+  }
   return OS;
 }
 
@@ -1431,7 +1445,7 @@ class ModuleSummaryIndex {
   // in the way some record are interpreted, like flags for instance.
   // Note that incrementing this may require changes in both BitcodeReader.cpp
   // and BitcodeWriter.cpp.
-  static constexpr uint64_t BitcodeSummaryVersion = 9;
+  static constexpr uint64_t BitcodeSummaryVersion = 10;
 
   // Regular LTO module name for ASM writer
   static constexpr const char *getRegularLTOModuleName() {

diff  --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 94ac0484f5ec7..e9490ccba8215 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -85,6 +85,8 @@ extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
 
 extern cl::opt<unsigned> MaxNumVTableAnnotations;
 
+extern cl::opt<bool> MemProfReportHintedSizes;
+
 // Walk through the operands of a given User via worklist iteration and populate
 // the set of GlobalValue references encountered. Invoked either on an
 // Instruction or a GlobalVariable (which walks its initializer).
@@ -517,6 +519,7 @@ static void computeFunctionSummary(
       auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
       if (MemProfMD) {
         std::vector<MIBInfo> MIBs;
+        std::vector<uint64_t> TotalSizes;
         for (auto &MDOp : MemProfMD->operands()) {
           auto *MIBMD = cast<const MDNode>(MDOp);
           MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -536,8 +539,17 @@ static void computeFunctionSummary(
           }
           MIBs.push_back(
               MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
+          if (MemProfReportHintedSizes) {
+            auto TotalSize = getMIBTotalSize(MIBMD);
+            assert(TotalSize);
+            TotalSizes.push_back(TotalSize);
+          }
         }
         Allocs.push_back(AllocInfo(std::move(MIBs)));
+        if (MemProfReportHintedSizes) {
+          assert(Allocs.back().MIBs.size() == TotalSizes.size());
+          Allocs.back().TotalSizes = std::move(TotalSizes);
+        }
       } else if (!InstCallsite.empty()) {
         SmallVector<unsigned> StackIdIndices;
         for (auto StackId : InstCallsite)

diff  --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f56b2b32ff98f..6203c6e5119d1 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7994,7 +7994,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
     case bitc::FS_PERMODULE_ALLOC_INFO: {
       unsigned I = 0;
       std::vector<MIBInfo> MIBs;
-      while (I < Record.size()) {
+      unsigned NumMIBs = 0;
+      if (Version >= 10)
+        NumMIBs = Record[I++];
+      unsigned MIBsRead = 0;
+      while ((Version >= 10 && MIBsRead++ < NumMIBs) ||
+             (Version < 10 && I < Record.size())) {
         assert(Record.size() - I >= 2);
         AllocationType AllocType = (AllocationType)Record[I++];
         unsigned NumStackEntries = Record[I++];
@@ -8007,7 +8012,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
         }
         MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
       }
+      std::vector<uint64_t> TotalSizes;
+      // We either have no sizes or NumMIBs of them.
+      assert(I == Record.size() || Record.size() - I == NumMIBs);
+      if (I < Record.size()) {
+        MIBsRead = 0;
+        while (MIBsRead++ < NumMIBs)
+          TotalSizes.push_back(Record[I++]);
+      }
       PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
+      if (!TotalSizes.empty()) {
+        assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
+        PendingAllocs.back().TotalSizes = std::move(TotalSizes);
+      }
       break;
     }
 
@@ -8034,8 +8051,21 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       SmallVector<uint8_t> Versions;
       for (unsigned J = 0; J < NumVersions; J++)
         Versions.push_back(Record[I++]);
+      std::vector<uint64_t> TotalSizes;
+      // We either have no sizes or NumMIBs of them.
+      assert(I == Record.size() || Record.size() - I == NumMIBs);
+      if (I < Record.size()) {
+        MIBsRead = 0;
+        while (MIBsRead++ < NumMIBs) {
+          TotalSizes.push_back(Record[I++]);
+        }
+      }
       PendingAllocs.push_back(
           AllocInfo(std::move(Versions), std::move(MIBs)));
+      if (!TotalSizes.empty()) {
+        assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
+        PendingAllocs.back().TotalSizes = std::move(TotalSizes);
+      }
       break;
     }
     }

diff  --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 3378931065f9b..b3ebe70e8c52f 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4189,10 +4189,9 @@ static void writeFunctionHeapProfileRecords(
     // Per module alloc versions should always have a single entry of
     // value 0.
     assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
-    if (!PerModule) {
-      Record.push_back(AI.MIBs.size());
+    Record.push_back(AI.MIBs.size());
+    if (!PerModule)
       Record.push_back(AI.Versions.size());
-    }
     for (auto &MIB : AI.MIBs) {
       Record.push_back((uint8_t)MIB.AllocType);
       Record.push_back(MIB.StackIdIndices.size());
@@ -4203,6 +4202,11 @@ static void writeFunctionHeapProfileRecords(
       for (auto V : AI.Versions)
         Record.push_back(V);
     }
+    assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size());
+    if (!AI.TotalSizes.empty()) {
+      for (auto Size : AI.TotalSizes)
+        Record.push_back(Size);
+    }
     Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
                                 : bitc::FS_COMBINED_ALLOC_INFO,
                       Record, AllocAbbrev);
@@ -4432,7 +4436,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
 
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
   // n x (alloc type, numstackids, numstackids x stackidindex)
+  // optional: nummib x total size
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
@@ -4576,6 +4582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
   // nummib x (alloc type, numstackids, numstackids x stackidindex),
   // numver x version
+  // optional: nummib x total size
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
@@ -4675,7 +4682,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     writeFunctionHeapProfileRecords(
         Stream, FS, CallsiteAbbrev, AllocAbbrev,
         /*PerModule*/ false,
-        /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
+        /*GetValueId*/
+        [&](const ValueInfo &VI) -> unsigned {
           std::optional<unsigned> ValueID = GetValueId(VI);
           // This can happen in shared index files for distributed ThinLTO if
           // the callee function summary is not included. Record 0 which we
@@ -4685,7 +4693,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
             return 0;
           return *ValueID;
         },
-        /*GetStackIndex*/ [&](unsigned I) {
+        /*GetStackIndex*/
+        [&](unsigned I) {
           // Get the corresponding index into the list of StackIds actually
           // being written for this combined index (which may be a subset in
           // the case of distributed indexes).

diff  --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c1e5ab1a2b561..ef9ddeaaab632 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -134,6 +134,8 @@ cl::opt<bool> SupportsHotColdNew(
     cl::desc("Linking with hot/cold operator new interfaces"));
 } // namespace llvm
 
+extern cl::opt<bool> MemProfReportHintedSizes;
+
 namespace {
 /// CRTP base for graphs built from either IR or ThinLTO summary index.
 ///
@@ -172,6 +174,7 @@ class CallsiteContextGraph {
 
   void dump() const;
   void print(raw_ostream &OS) const;
+  void printTotalSizes(raw_ostream &OS) const;
 
   friend raw_ostream &operator<<(raw_ostream &OS,
                                  const CallsiteContextGraph &CCG) {
@@ -439,7 +442,7 @@ class CallsiteContextGraph {
   void addStackNodesForMIB(ContextNode *AllocNode,
                            CallStack<NodeT, IteratorT> &StackContext,
                            CallStack<NodeT, IteratorT> &CallsiteContext,
-                           AllocationType AllocType);
+                           AllocationType AllocType, uint64_t TotalSize);
 
   /// Matches all callsite metadata (or summary) to the nodes created for
   /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
@@ -611,6 +614,10 @@ class CallsiteContextGraph {
   /// Map from each context ID to the AllocationType assigned to that context.
   DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
 
+  /// Map from each contextID to the profiled aggregate allocation size,
+  /// optionally populated when requested (via MemProfReportHintedSizes).
+  DenseMap<uint32_t, uint64_t> ContextIdToTotalSize;
+
   /// Identifies the context node created for a stack id when adding the MIB
   /// contexts to the graph. This is used to locate the context nodes when
   /// trying to assign the corresponding callsites with those stack ids to these
@@ -1004,11 +1011,24 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
   return AllocNode;
 }
 
+static std::string getAllocTypeString(uint8_t AllocTypes) {
+  if (!AllocTypes)
+    return "None";
+  std::string Str;
+  if (AllocTypes & (uint8_t)AllocationType::NotCold)
+    Str += "NotCold";
+  if (AllocTypes & (uint8_t)AllocationType::Cold)
+    Str += "Cold";
+  return Str;
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 template <class NodeT, class IteratorT>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
     ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
-    CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
+    CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType,
+    uint64_t TotalSize) {
+  assert(!MemProfReportHintedSizes || TotalSize > 0);
   // Treating the hot alloc type as NotCold before the disambiguation for "hot"
   // is done.
   if (AllocType == AllocationType::Hot)
@@ -1016,6 +1036,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
 
   ContextIdToAllocationType[++LastContextId] = AllocType;
 
+  if (MemProfReportHintedSizes) {
+    assert(TotalSize);
+    ContextIdToTotalSize[LastContextId] = TotalSize;
+  }
+
   // Update alloc type and context ids for this MIB.
   AllocNode->AllocTypes |= (uint8_t)AllocType;
 
@@ -1060,6 +1085,10 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
     assert(ContextIdToAllocationType.count(OldId));
     // The new context has the same allocation type as original.
     ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
+    // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy
+    // up the size. Assume that if we are able to duplicate context ids that we
+    // will be able to disambiguate all copies.
+    ContextIdToTotalSize[LastContextId] = 0;
   }
   return NewContextIds;
 }
@@ -1663,7 +1692,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
             CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
             addStackNodesForMIB<MDNode, MDNode::op_iterator>(
                 AllocNode, StackContext, CallsiteContext,
-                getMIBAllocType(MIBMD));
+                getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD));
           }
           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
           // Memprof and callsite metadata on memory allocations no longer
@@ -1735,12 +1764,20 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
           // stack ids on the allocation call during ModuleSummaryAnalysis.
           CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
               EmptyContext;
+          unsigned I = 0;
+          assert(!MemProfReportHintedSizes ||
+                 AN.TotalSizes.size() == AN.MIBs.size());
           // Now add all of the MIBs and their stack nodes.
           for (auto &MIB : AN.MIBs) {
             CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
                 StackContext(&MIB);
+            uint64_t TotalSize = 0;
+            if (MemProfReportHintedSizes)
+              TotalSize = AN.TotalSizes[I];
             addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
-                AllocNode, StackContext, EmptyContext, MIB.AllocType);
+                AllocNode, StackContext, EmptyContext, MIB.AllocType,
+                TotalSize);
+            I++;
           }
           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
           // Initialize version 0 on the summary alloc node to the current alloc
@@ -2171,17 +2208,6 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(
   return true;
 }
 
-static std::string getAllocTypeString(uint8_t AllocTypes) {
-  if (!AllocTypes)
-    return "None";
-  std::string Str;
-  if (AllocTypes & (uint8_t)AllocationType::NotCold)
-    Str += "NotCold";
-  if (AllocTypes & (uint8_t)AllocationType::Cold)
-    Str += "Cold";
-  return Str;
-}
-
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
     const {
@@ -2261,6 +2287,30 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
   }
 }
 
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
+    raw_ostream &OS) const {
+  using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
+  for (const auto Node : nodes<GraphType>(this)) {
+    if (Node->isRemoved())
+      continue;
+    if (!Node->IsAllocation)
+      continue;
+    DenseSet<uint32_t> ContextIds = Node->getContextIds();
+    std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
+    std::sort(SortedIds.begin(), SortedIds.end());
+    for (auto Id : SortedIds) {
+      auto SizeI = ContextIdToTotalSize.find(Id);
+      assert(SizeI != ContextIdToTotalSize.end());
+      auto TypeI = ContextIdToAllocationType.find(Id);
+      assert(TypeI != ContextIdToAllocationType.end());
+      OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id
+         << " with total size " << SizeI->second << " is "
+         << getAllocTypeString(Node->AllocTypes) << " after cloning\n";
+    }
+  }
+}
+
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
   using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
@@ -3797,6 +3847,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
   if (ExportToDot)
     exportToDot("clonefuncassign");
 
+  if (MemProfReportHintedSizes)
+    printTotalSizes(errs());
+
   return Changed;
 }
 

diff  --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index 98feab6fe2f99..26c64f81a773f 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
 ; RUN: opt  -module-summary  %s -o - | llvm-bcanalyzer -dump | FileCheck %s
 
 ; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=9/>
+; CHECK: <VERSION op0=10/>
 
 
 

diff  --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
index 19e228fd5355c..b3f1e770810d2 100644
--- a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
+++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
@@ -11,7 +11,7 @@
 ; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS
 
 ; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK-NEXT:   <VERSION op0=9/>
+; CHECK-NEXT:   <VERSION op0=
 ; CHECK-NEXT:   <FLAGS op0=0/>
 ; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
 ; that loads vtable pointers.

diff  --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index 54e01e5fcdf95..6922dbfd36846 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -34,7 +34,7 @@
 ;; -stats requires asserts
 ; REQUIRES: asserts
 
-; RUN: opt -thinlto-bc %s >%t.o
+; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
 ; RUN:	-supports-hot-cold-new \
 ; RUN:	-r=%t.o,main,plx \
@@ -43,9 +43,11 @@
 ; RUN:	-r=%t.o,_Znam, \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
+; RUN:	-memprof-report-hinted-sizes \
 ; RUN:	-stats -pass-remarks=memprof-context-disambiguation -save-temps \
 ; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:	--check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
+; RUN:	--check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS \
+; RUN:  --check-prefix=SIZES
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -64,9 +66,10 @@
 ; RUN:	-r=%t.o,_Znam, \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
+; RUN:	-memprof-report-hinted-sizes \
 ; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
 ; RUN:	-o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN:	--check-prefix=STATS
+; RUN:	--check-prefix=STATS --check-prefix=SIZES
 
 ; RUN:	cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -125,9 +128,9 @@ attributes #0 = { noinline optnone }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold"}
+!3 = !{!4, !"notcold", i64 100}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold"}
+!5 = !{!6, !"cold", i64 400}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
@@ -264,6 +267,8 @@ attributes #0 = { noinline optnone }
 ; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
 ; DUMP:		Clone of [[BAR]]
 
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
 
 ; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
 ; REMARKS: created clone _Z3barv.memprof.1

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index 483582c6ced95..a82f872d51c7d 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -38,8 +38,9 @@
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
 ; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
+; RUN:  -memprof-report-hinted-sizes \
 ; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN:	--check-prefix=STATS --check-prefix=REMARKS
+; RUN:	--check-prefix=STATS --check-prefix=REMARKS --check-prefix=SIZES
 
 ; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
 ;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -105,9 +106,9 @@ attributes #6 = { builtin }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold"}
+!3 = !{!4, !"notcold", i64 100}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold"}
+!5 = !{!6, !"cold", i64 400}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
@@ -248,6 +249,8 @@ attributes #6 = { builtin }
 ; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
 ; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
 
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
 
 ; IR: define {{.*}} @main
 ;; The first call to foo does not allocate cold memory. It should call the


        


More information about the llvm-commits mailing list