[llvm] 9f8205d - [MemProf] Track and report profiled sizes through cloning (#98382)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 16:10:34 PDT 2024
Author: Teresa Johnson
Date: 2024-07-11T16:10:30-07:00
New Revision: 9f8205d9d8ddccd5c821c2a654805434706a43c2
URL: https://github.com/llvm/llvm-project/commit/9f8205d9d8ddccd5c821c2a654805434706a43c2
DIFF: https://github.com/llvm/llvm-project/commit/9f8205d9d8ddccd5c821c2a654805434706a43c2.diff
LOG: [MemProf] Track and report profiled sizes through cloning (#98382)
If requested, via the -memprof-report-hinted-sizes option, track the
total profiled size of each MIB through the thin link, then report on
the corresponding allocation coldness after all cloning is complete.
To save size, a different bitcode record type is used for the allocation
info when the option is specified, and the sizes are kept separate from
the MIBs in the index.
Added:
Modified:
llvm/include/llvm/Bitcode/LLVMBitCodes.h
llvm/include/llvm/IR/ModuleSummaryIndex.h
llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
llvm/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
llvm/test/Bitcode/summary_version.ll
llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
llvm/test/ThinLTO/X86/memprof-basic.ll
llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 5b5e08b5cbc3f..184bbe32df695 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -307,7 +307,8 @@ enum GlobalValueSummarySymtabCodes {
// [valueid, n x stackidindex]
FS_PERMODULE_CALLSITE_INFO = 26,
// Summary of per-module allocation memprof metadata.
- // [n x (alloc type, nummib, nummib x stackidindex)]
+ // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
+ // [nummib x total size]?]
FS_PERMODULE_ALLOC_INFO = 27,
// Summary of combined index memprof callsite metadata.
// [valueid, numstackindices, numver,
@@ -316,7 +317,7 @@ enum GlobalValueSummarySymtabCodes {
// Summary of combined index allocation memprof metadata.
// [nummib, numver,
// nummib x (alloc type, numstackids, numstackids x stackidindex),
- // numver x version]
+ // numver x version, [nummib x total size]?]
FS_COMBINED_ALLOC_INFO = 29,
FS_STACK_IDS = 30,
};
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 31271ed388e54..00934cc1ce6f2 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -403,6 +403,10 @@ struct AllocInfo {
// Vector of MIBs in this memprof metadata.
std::vector<MIBInfo> MIBs;
+ // If requested, keep track of total profiled sizes for each MIB. This will be
+ // a vector of the same length and order as the MIBs vector, if non-empty.
+ std::vector<uint64_t> TotalSizes;
+
AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
Versions.push_back(0);
}
@@ -423,6 +427,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
for (auto &M : AE.MIBs) {
OS << "\t\t" << M << "\n";
}
+ if (!AE.TotalSizes.empty()) {
+ OS << " TotalSizes per MIB:\n\t\t";
+ First = true;
+ for (uint64_t TS : AE.TotalSizes) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << TS << "\n";
+ }
+ }
return OS;
}
@@ -1431,7 +1445,7 @@ class ModuleSummaryIndex {
// in the way some record are interpreted, like flags for instance.
// Note that incrementing this may require changes in both BitcodeReader.cpp
// and BitcodeWriter.cpp.
- static constexpr uint64_t BitcodeSummaryVersion = 9;
+ static constexpr uint64_t BitcodeSummaryVersion = 10;
// Regular LTO module name for ASM writer
static constexpr const char *getRegularLTOModuleName() {
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 94ac0484f5ec7..e9490ccba8215 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -85,6 +85,8 @@ extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
extern cl::opt<unsigned> MaxNumVTableAnnotations;
+extern cl::opt<bool> MemProfReportHintedSizes;
+
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
@@ -517,6 +519,7 @@ static void computeFunctionSummary(
auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
if (MemProfMD) {
std::vector<MIBInfo> MIBs;
+ std::vector<uint64_t> TotalSizes;
for (auto &MDOp : MemProfMD->operands()) {
auto *MIBMD = cast<const MDNode>(MDOp);
MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -536,8 +539,17 @@ static void computeFunctionSummary(
}
MIBs.push_back(
MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
+ if (MemProfReportHintedSizes) {
+ auto TotalSize = getMIBTotalSize(MIBMD);
+ assert(TotalSize);
+ TotalSizes.push_back(TotalSize);
+ }
}
Allocs.push_back(AllocInfo(std::move(MIBs)));
+ if (MemProfReportHintedSizes) {
+ assert(Allocs.back().MIBs.size() == TotalSizes.size());
+ Allocs.back().TotalSizes = std::move(TotalSizes);
+ }
} else if (!InstCallsite.empty()) {
SmallVector<unsigned> StackIdIndices;
for (auto StackId : InstCallsite)
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f56b2b32ff98f..6203c6e5119d1 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7994,7 +7994,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
case bitc::FS_PERMODULE_ALLOC_INFO: {
unsigned I = 0;
std::vector<MIBInfo> MIBs;
- while (I < Record.size()) {
+ unsigned NumMIBs = 0;
+ if (Version >= 10)
+ NumMIBs = Record[I++];
+ unsigned MIBsRead = 0;
+ while ((Version >= 10 && MIBsRead++ < NumMIBs) ||
+ (Version < 10 && I < Record.size())) {
assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
unsigned NumStackEntries = Record[I++];
@@ -8007,7 +8012,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
+ std::vector<uint64_t> TotalSizes;
+ // We either have no sizes or NumMIBs of them.
+ assert(I == Record.size() || Record.size() - I == NumMIBs);
+ if (I < Record.size()) {
+ MIBsRead = 0;
+ while (MIBsRead++ < NumMIBs)
+ TotalSizes.push_back(Record[I++]);
+ }
PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
+ if (!TotalSizes.empty()) {
+ assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
+ PendingAllocs.back().TotalSizes = std::move(TotalSizes);
+ }
break;
}
@@ -8034,8 +8051,21 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
SmallVector<uint8_t> Versions;
for (unsigned J = 0; J < NumVersions; J++)
Versions.push_back(Record[I++]);
+ std::vector<uint64_t> TotalSizes;
+ // We either have no sizes or NumMIBs of them.
+ assert(I == Record.size() || Record.size() - I == NumMIBs);
+ if (I < Record.size()) {
+ MIBsRead = 0;
+ while (MIBsRead++ < NumMIBs) {
+ TotalSizes.push_back(Record[I++]);
+ }
+ }
PendingAllocs.push_back(
AllocInfo(std::move(Versions), std::move(MIBs)));
+ if (!TotalSizes.empty()) {
+ assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
+ PendingAllocs.back().TotalSizes = std::move(TotalSizes);
+ }
break;
}
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 3378931065f9b..b3ebe70e8c52f 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4189,10 +4189,9 @@ static void writeFunctionHeapProfileRecords(
// Per module alloc versions should always have a single entry of
// value 0.
assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
- if (!PerModule) {
- Record.push_back(AI.MIBs.size());
+ Record.push_back(AI.MIBs.size());
+ if (!PerModule)
Record.push_back(AI.Versions.size());
- }
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
Record.push_back(MIB.StackIdIndices.size());
@@ -4203,6 +4202,11 @@ static void writeFunctionHeapProfileRecords(
for (auto V : AI.Versions)
Record.push_back(V);
}
+ assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size());
+ if (!AI.TotalSizes.empty()) {
+ for (auto Size : AI.TotalSizes)
+ Record.push_back(Size);
+ }
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
: bitc::FS_COMBINED_ALLOC_INFO,
Record, AllocAbbrev);
@@ -4432,7 +4436,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
// n x (alloc type, numstackids, numstackids x stackidindex)
+ // optional: nummib x total size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
@@ -4576,6 +4582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// nummib x (alloc type, numstackids, numstackids x stackidindex),
// numver x version
+ // optional: nummib x total size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
@@ -4675,7 +4682,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
writeFunctionHeapProfileRecords(
Stream, FS, CallsiteAbbrev, AllocAbbrev,
/*PerModule*/ false,
- /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
+ /*GetValueId*/
+ [&](const ValueInfo &VI) -> unsigned {
std::optional<unsigned> ValueID = GetValueId(VI);
// This can happen in shared index files for distributed ThinLTO if
// the callee function summary is not included. Record 0 which we
@@ -4685,7 +4693,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return 0;
return *ValueID;
},
- /*GetStackIndex*/ [&](unsigned I) {
+ /*GetStackIndex*/
+ [&](unsigned I) {
// Get the corresponding index into the list of StackIds actually
// being written for this combined index (which may be a subset in
// the case of distributed indexes).
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c1e5ab1a2b561..ef9ddeaaab632 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -134,6 +134,8 @@ cl::opt<bool> SupportsHotColdNew(
cl::desc("Linking with hot/cold operator new interfaces"));
} // namespace llvm
+extern cl::opt<bool> MemProfReportHintedSizes;
+
namespace {
/// CRTP base for graphs built from either IR or ThinLTO summary index.
///
@@ -172,6 +174,7 @@ class CallsiteContextGraph {
void dump() const;
void print(raw_ostream &OS) const;
+ void printTotalSizes(raw_ostream &OS) const;
friend raw_ostream &operator<<(raw_ostream &OS,
const CallsiteContextGraph &CCG) {
@@ -439,7 +442,7 @@ class CallsiteContextGraph {
void addStackNodesForMIB(ContextNode *AllocNode,
CallStack<NodeT, IteratorT> &StackContext,
CallStack<NodeT, IteratorT> &CallsiteContext,
- AllocationType AllocType);
+ AllocationType AllocType, uint64_t TotalSize);
/// Matches all callsite metadata (or summary) to the nodes created for
/// allocation memprof MIB metadata, synthesizing new nodes to reflect any
@@ -611,6 +614,10 @@ class CallsiteContextGraph {
/// Map from each context ID to the AllocationType assigned to that context.
DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
+ /// Map from each contextID to the profiled aggregate allocation size,
+ /// optionally populated when requested (via MemProfReportHintedSizes).
+ DenseMap<uint32_t, uint64_t> ContextIdToTotalSize;
+
/// Identifies the context node created for a stack id when adding the MIB
/// contexts to the graph. This is used to locate the context nodes when
/// trying to assign the corresponding callsites with those stack ids to these
@@ -1004,11 +1011,24 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
return AllocNode;
}
+static std::string getAllocTypeString(uint8_t AllocTypes) {
+ if (!AllocTypes)
+ return "None";
+ std::string Str;
+ if (AllocTypes & (uint8_t)AllocationType::NotCold)
+ Str += "NotCold";
+ if (AllocTypes & (uint8_t)AllocationType::Cold)
+ Str += "Cold";
+ return Str;
+}
+
template <typename DerivedCCG, typename FuncTy, typename CallTy>
template <class NodeT, class IteratorT>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
- CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
+ CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType,
+ uint64_t TotalSize) {
+ assert(!MemProfReportHintedSizes || TotalSize > 0);
// Treating the hot alloc type as NotCold before the disambiguation for "hot"
// is done.
if (AllocType == AllocationType::Hot)
@@ -1016,6 +1036,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
ContextIdToAllocationType[++LastContextId] = AllocType;
+ if (MemProfReportHintedSizes) {
+ assert(TotalSize);
+ ContextIdToTotalSize[LastContextId] = TotalSize;
+ }
+
// Update alloc type and context ids for this MIB.
AllocNode->AllocTypes |= (uint8_t)AllocType;
@@ -1060,6 +1085,10 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
assert(ContextIdToAllocationType.count(OldId));
// The new context has the same allocation type as original.
ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
+ // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy
+ // up the size. Assume that if we are able to duplicate context ids that we
+ // will be able to disambiguate all copies.
+ ContextIdToTotalSize[LastContextId] = 0;
}
return NewContextIds;
}
@@ -1663,7 +1692,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
addStackNodesForMIB<MDNode, MDNode::op_iterator>(
AllocNode, StackContext, CallsiteContext,
- getMIBAllocType(MIBMD));
+ getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD));
}
assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
// Memprof and callsite metadata on memory allocations no longer
@@ -1735,12 +1764,20 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
// stack ids on the allocation call during ModuleSummaryAnalysis.
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
EmptyContext;
+ unsigned I = 0;
+ assert(!MemProfReportHintedSizes ||
+ AN.TotalSizes.size() == AN.MIBs.size());
// Now add all of the MIBs and their stack nodes.
for (auto &MIB : AN.MIBs) {
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
StackContext(&MIB);
+ uint64_t TotalSize = 0;
+ if (MemProfReportHintedSizes)
+ TotalSize = AN.TotalSizes[I];
addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
- AllocNode, StackContext, EmptyContext, MIB.AllocType);
+ AllocNode, StackContext, EmptyContext, MIB.AllocType,
+ TotalSize);
+ I++;
}
assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
// Initialize version 0 on the summary alloc node to the current alloc
@@ -2171,17 +2208,6 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(
return true;
}
-static std::string getAllocTypeString(uint8_t AllocTypes) {
- if (!AllocTypes)
- return "None";
- std::string Str;
- if (AllocTypes & (uint8_t)AllocationType::NotCold)
- Str += "NotCold";
- if (AllocTypes & (uint8_t)AllocationType::Cold)
- Str += "Cold";
- return Str;
-}
-
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
const {
@@ -2261,6 +2287,30 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
}
}
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
+ raw_ostream &OS) const {
+ using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
+ for (const auto Node : nodes<GraphType>(this)) {
+ if (Node->isRemoved())
+ continue;
+ if (!Node->IsAllocation)
+ continue;
+ DenseSet<uint32_t> ContextIds = Node->getContextIds();
+ std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
+ std::sort(SortedIds.begin(), SortedIds.end());
+ for (auto Id : SortedIds) {
+ auto SizeI = ContextIdToTotalSize.find(Id);
+ assert(SizeI != ContextIdToTotalSize.end());
+ auto TypeI = ContextIdToAllocationType.find(Id);
+ assert(TypeI != ContextIdToAllocationType.end());
+ OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id
+ << " with total size " << SizeI->second << " is "
+ << getAllocTypeString(Node->AllocTypes) << " after cloning\n";
+ }
+ }
+}
+
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
@@ -3797,6 +3847,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
if (ExportToDot)
exportToDot("clonefuncassign");
+ if (MemProfReportHintedSizes)
+ printTotalSizes(errs());
+
return Changed;
}
diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index 98feab6fe2f99..26c64f81a773f 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=9/>
+; CHECK: <VERSION op0=10/>
diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
index 19e228fd5355c..b3f1e770810d2 100644
--- a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
+++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
@@ -11,7 +11,7 @@
; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK-NEXT: <VERSION op0=9/>
+; CHECK-NEXT: <VERSION op0=
; CHECK-NEXT: <FLAGS op0=0/>
; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
; that loads vtable pointers.
diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index 54e01e5fcdf95..6922dbfd36846 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -34,7 +34,7 @@
;; -stats requires asserts
; REQUIRES: asserts
-; RUN: opt -thinlto-bc %s >%t.o
+; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
@@ -43,9 +43,11 @@
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
+; RUN: -memprof-report-hinted-sizes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
+; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS \
+; RUN: --check-prefix=SIZES
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -64,9 +66,10 @@
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
+; RUN: -memprof-report-hinted-sizes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN: --check-prefix=STATS
+; RUN: --check-prefix=STATS --check-prefix=SIZES
; RUN: cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -125,9 +128,9 @@ attributes #0 = { noinline optnone }
!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{!3, !5}
-!3 = !{!4, !"notcold"}
+!3 = !{!4, !"notcold", i64 100}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold"}
+!5 = !{!6, !"cold", i64 400}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!8 = !{i64 -5964873800580613432}
@@ -264,6 +267,8 @@ attributes #0 = { noinline optnone }
; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
; DUMP: Clone of [[BAR]]
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
; REMARKS: created clone _Z3barv.memprof.1
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index 483582c6ced95..a82f872d51c7d 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -38,8 +38,9 @@
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
+; RUN: -memprof-report-hinted-sizes \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN: --check-prefix=STATS --check-prefix=REMARKS
+; RUN: --check-prefix=STATS --check-prefix=REMARKS --check-prefix=SIZES
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -105,9 +106,9 @@ attributes #6 = { builtin }
!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{!3, !5}
-!3 = !{!4, !"notcold"}
+!3 = !{!4, !"notcold", i64 100}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold"}
+!5 = !{!6, !"cold", i64 400}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!8 = !{i64 -5964873800580613432}
@@ -248,6 +249,8 @@ attributes #6 = { builtin }
; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
; IR: define {{.*}} @main
;; The first call to foo does not allocate cold memory. It should call the
More information about the llvm-commits
mailing list