[llvm] [MemProf] Track and report profiled sizes through cloning (PR #98382)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 14:01:46 PDT 2024
https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/98382
>From 73358b0b8640744b1247c17ae25450ff9d52642a Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 10 Jul 2024 10:02:03 -0700
Subject: [PATCH 1/4] [MemProf] Track and report profiled sizes through cloning
If requested, via the -memprof-report-hinted-sizes option, track the
total profiled size of each MIB through the thin link, then report on
the corresponding allocation coldness after all cloning is complete.
To save size, a different bitcode record type is used for the allocation
info when the option is specified, and the sizes are kept separate from
the MIBs in the index.
---
llvm/include/llvm/Bitcode/LLVMBitCodes.h | 10 +++
llvm/include/llvm/IR/ModuleSummaryIndex.h | 14 ++++
llvm/include/llvm/IR/ModuleSummaryIndexYAML.h | 3 +-
llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 16 +++-
llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 2 +
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 34 +++++++-
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 59 +++++++++++---
.../IPO/MemProfContextDisambiguation.cpp | 81 +++++++++++++++----
llvm/test/ThinLTO/X86/memprof-basic.ll | 17 ++--
.../MemProfContextDisambiguation/basic.ll | 11 ++-
10 files changed, 205 insertions(+), 42 deletions(-)
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 5b5e08b5cbc3f..9f989aaf22779 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -319,6 +319,16 @@ enum GlobalValueSummarySymtabCodes {
// numver x version]
FS_COMBINED_ALLOC_INFO = 29,
FS_STACK_IDS = 30,
+ // Summary of per-module allocation memprof metadata when we are tracking
+ // total sizes.
+ // [n x (alloc type, total size, nummib, nummib x stackidindex)]
+ FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES = 31,
+ // Summary of combined index allocation memprof metadata when we are tracking
+ // total sizes.
+ // [nummib, numver,
+ // nummib x (alloc type, total size, numstackids,
+ // numstackids x stackidindex), numver x version]
+ FS_COMBINED_ALLOC_INFO_TOTAL_SIZES = 32,
};
enum MetadataCodes {
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 31271ed388e54..950c0ed824dbb 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -403,6 +403,10 @@ struct AllocInfo {
// Vector of MIBs in this memprof metadata.
std::vector<MIBInfo> MIBs;
+ // If requested, keep track of total profiled sizes for each MIB. This will be
+ // a vector of the same length and order as the MIBs vector.
+ std::unique_ptr<std::vector<uint64_t>> TotalSizes;
+
AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
Versions.push_back(0);
}
@@ -423,6 +427,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
for (auto &M : AE.MIBs) {
OS << "\t\t" << M << "\n";
}
+ if (AE.TotalSizes) {
+ OS << " TotalSizes per MIB:\n\t\t";
+ First = true;
+ for (auto &TS : *AE.TotalSizes) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << TS << "\n";
+ }
+ }
return OS;
}
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index b2747d24c5396..26bf08f9f7439 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -224,6 +224,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
V.emplace(RefGUID, /*IsAnalysis=*/false);
Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*V.find(RefGUID)));
}
+ std::vector<AllocInfo> EmptyAllocInfo;
Elem.SummaryList.push_back(std::make_unique<FunctionSummary>(
GlobalValueSummary::GVFlags(
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
@@ -238,7 +239,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
std::move(FSum.TypeTestAssumeConstVCalls),
std::move(FSum.TypeCheckedLoadConstVCalls),
ArrayRef<FunctionSummary::ParamAccess>{}, ArrayRef<CallsiteInfo>{},
- ArrayRef<AllocInfo>{}));
+ std::move(EmptyAllocInfo)));
}
}
static void output(IO &io, GlobalValueSummaryMapTy &V) {
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 94ac0484f5ec7..c3b6fd8bc20af 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -85,6 +85,8 @@ extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
extern cl::opt<unsigned> MaxNumVTableAnnotations;
+extern cl::opt<bool> MemProfReportHintedSizes;
+
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
@@ -517,6 +519,7 @@ static void computeFunctionSummary(
auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
if (MemProfMD) {
std::vector<MIBInfo> MIBs;
+ std::vector<uint64_t> TotalSizes;
for (auto &MDOp : MemProfMD->operands()) {
auto *MIBMD = cast<const MDNode>(MDOp);
MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -536,8 +539,18 @@ static void computeFunctionSummary(
}
MIBs.push_back(
MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
+ if (MemProfReportHintedSizes) {
+ auto TotalSize = getMIBTotalSize(MIBMD);
+ assert(TotalSize);
+ TotalSizes.push_back(TotalSize);
+ }
}
Allocs.push_back(AllocInfo(std::move(MIBs)));
+ if (MemProfReportHintedSizes) {
+ assert(Allocs.back().MIBs.size() == TotalSizes.size());
+ Allocs.back().TotalSizes =
+ std::make_unique<std::vector<uint64_t>>(std::move(TotalSizes));
+ }
} else if (!InstCallsite.empty()) {
SmallVector<unsigned> StackIdIndices;
for (auto StackId : InstCallsite)
@@ -935,6 +948,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
CantBePromoted.insert(GV->getGUID());
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
+ std::vector<AllocInfo> EmptyAllocInfo;
std::unique_ptr<FunctionSummary> Summary =
std::make_unique<FunctionSummary>(
GVFlags, /*InstCount=*/0,
@@ -957,7 +971,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
ArrayRef<FunctionSummary::ConstVCall>{},
ArrayRef<FunctionSummary::ConstVCall>{},
ArrayRef<FunctionSummary::ParamAccess>{},
- ArrayRef<CallsiteInfo>{}, ArrayRef<AllocInfo>{});
+ ArrayRef<CallsiteInfo>{}, std::move(EmptyAllocInfo));
Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index b7ed9cdf63145..82aa7b7e077a1 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -328,6 +328,8 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO)
STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
STRINGIFY_CODE(FS, STACK_IDS)
+ STRINGIFY_CODE(FS, PERMODULE_ALLOC_INFO_TOTAL_SIZES)
+ STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_TOTAL_SIZES)
}
case bitc::METADATA_ATTACHMENT_ID:
switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f56b2b32ff98f..1cba859a3fade 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7991,12 +7991,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
break;
}
- case bitc::FS_PERMODULE_ALLOC_INFO: {
+ case bitc::FS_PERMODULE_ALLOC_INFO:
+ case bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES: {
+ bool HasTotalSizes = BitCode == bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES;
unsigned I = 0;
std::vector<MIBInfo> MIBs;
+ std::vector<uint64_t> TotalSizes;
while (I < Record.size()) {
- assert(Record.size() - I >= 2);
+ assert(Record.size() - I >= (HasTotalSizes ? 3 : 2));
AllocationType AllocType = (AllocationType)Record[I++];
+ if (HasTotalSizes) {
+ TotalSizes.push_back(Record[I++]);
+ assert(TotalSizes.back());
+ }
unsigned NumStackEntries = Record[I++];
assert(Record.size() - I >= NumStackEntries);
SmallVector<unsigned> StackIdList;
@@ -8008,18 +8015,31 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
+ assert(HasTotalSizes != TotalSizes.empty());
+ if (HasTotalSizes) {
+ assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
+ PendingAllocs.back().TotalSizes =
+ std::make_unique<std::vector<uint64_t>>(std::move(TotalSizes));
+ }
break;
}
- case bitc::FS_COMBINED_ALLOC_INFO: {
+ case bitc::FS_COMBINED_ALLOC_INFO:
+ case bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES: {
+ bool HasTotalSizes = BitCode == bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES;
unsigned I = 0;
std::vector<MIBInfo> MIBs;
+ std::vector<uint64_t> TotalSizes;
unsigned NumMIBs = Record[I++];
unsigned NumVersions = Record[I++];
unsigned MIBsRead = 0;
while (MIBsRead++ < NumMIBs) {
- assert(Record.size() - I >= 2);
+ assert(Record.size() - I >= (HasTotalSizes ? 3 : 2));
AllocationType AllocType = (AllocationType)Record[I++];
+ if (HasTotalSizes) {
+ TotalSizes.push_back(Record[I++]);
+ assert(TotalSizes.back());
+ }
unsigned NumStackEntries = Record[I++];
assert(Record.size() - I >= NumStackEntries);
SmallVector<unsigned> StackIdList;
@@ -8036,6 +8056,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
Versions.push_back(Record[I++]);
PendingAllocs.push_back(
AllocInfo(std::move(Versions), std::move(MIBs)));
+ assert(HasTotalSizes != TotalSizes.empty());
+ if (HasTotalSizes) {
+ assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
+ PendingAllocs.back().TotalSizes =
+ std::make_unique<std::vector<uint64_t>>(std::move(TotalSizes));
+ }
break;
}
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 3378931065f9b..7b8f6371b5698 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -230,7 +230,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
void writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
- unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F);
+ unsigned CallsiteAbbrev, unsigned AllocAbbrev,
+ unsigned AllocTotalSizeAbbrev, const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev,
@@ -4158,7 +4159,7 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
static void writeFunctionHeapProfileRecords(
BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
- unsigned AllocAbbrev, bool PerModule,
+ unsigned AllocAbbrev, unsigned AllocTotalSizeAbbrev, bool PerModule,
std::function<unsigned(const ValueInfo &VI)> GetValueID,
std::function<unsigned(unsigned)> GetStackIndex) {
SmallVector<uint64_t> Record;
@@ -4193,19 +4194,31 @@ static void writeFunctionHeapProfileRecords(
Record.push_back(AI.MIBs.size());
Record.push_back(AI.Versions.size());
}
+ unsigned I = 0;
+ assert(!AI.TotalSizes || AI.TotalSizes->size() == AI.MIBs.size());
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
+ if (AI.TotalSizes) {
+ Record.push_back((*AI.TotalSizes)[I]);
+ assert((*AI.TotalSizes)[I]);
+ }
Record.push_back(MIB.StackIdIndices.size());
for (auto Id : MIB.StackIdIndices)
Record.push_back(GetStackIndex(Id));
+ I++;
}
if (!PerModule) {
for (auto V : AI.Versions)
Record.push_back(V);
}
- Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
- : bitc::FS_COMBINED_ALLOC_INFO,
- Record, AllocAbbrev);
+ unsigned Code =
+ PerModule ? (AI.TotalSizes ? bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES
+ : bitc::FS_PERMODULE_ALLOC_INFO)
+ : (AI.TotalSizes ? bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES
+ : bitc::FS_COMBINED_ALLOC_INFO);
+ unsigned AllocAbbrevToUse =
+ AI.TotalSizes ? AllocTotalSizeAbbrev : AllocAbbrev;
+ Stream.EmitRecord(Code, Record, AllocAbbrevToUse);
}
}
@@ -4214,7 +4227,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsRelBFAbbrev,
unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
- unsigned AllocAbbrev, const Function &F) {
+ unsigned AllocAbbrev, unsigned AllocTotalSizeAbbrev, const Function &F) {
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4225,7 +4238,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
});
writeFunctionHeapProfileRecords(
- Stream, FS, CallsiteAbbrev, AllocAbbrev,
+ Stream, FS, CallsiteAbbrev, AllocAbbrev, AllocTotalSizeAbbrev,
/*PerModule*/ true,
/*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
/*GetStackIndex*/ [&](unsigned I) { return I; });
@@ -4437,6 +4450,13 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES));
+ // n x (alloc type, total size, numstackids, numstackids x stackidindex)
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned AllocTotalSizeAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
// ensure the ordering is stable.
@@ -4454,9 +4474,10 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
continue;
}
auto *Summary = VI.getSummaryList()[0].get();
- writePerModuleFunctionSummaryRecord(
- NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
- FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, F);
+ writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
+ FSCallsRelBFAbbrev,
+ FSCallsProfileAbbrev, CallsiteAbbrev,
+ AllocAbbrev, AllocTotalSizeAbbrev, F);
}
// Capture references from GlobalVariable initializers, which are outside
@@ -4586,6 +4607,16 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return DecSummaries->count(GVS);
};
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
+ // nummib x (alloc type, total size, numstackids, numstackids x stackidindex),
+ // numver x version
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned AllocTotalSizeAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
SmallVector<AliasSummary *, 64> Aliases;
@@ -4673,9 +4704,10 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
getReferencedTypeIds(FS, ReferencedTypeIds);
writeFunctionHeapProfileRecords(
- Stream, FS, CallsiteAbbrev, AllocAbbrev,
+ Stream, FS, CallsiteAbbrev, AllocAbbrev, AllocTotalSizeAbbrev,
/*PerModule*/ false,
- /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
+ /*GetValueId*/
+ [&](const ValueInfo &VI) -> unsigned {
std::optional<unsigned> ValueID = GetValueId(VI);
// This can happen in shared index files for distributed ThinLTO if
// the callee function summary is not included. Record 0 which we
@@ -4685,7 +4717,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return 0;
return *ValueID;
},
- /*GetStackIndex*/ [&](unsigned I) {
+ /*GetStackIndex*/
+ [&](unsigned I) {
// Get the corresponding index into the list of StackIds actually
// being written for this combined index (which may be a subset in
// the case of distributed indexes).
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c1e5ab1a2b561..f5713d31e263f 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -134,6 +134,8 @@ cl::opt<bool> SupportsHotColdNew(
cl::desc("Linking with hot/cold operator new interfaces"));
} // namespace llvm
+extern cl::opt<bool> MemProfReportHintedSizes;
+
namespace {
/// CRTP base for graphs built from either IR or ThinLTO summary index.
///
@@ -172,6 +174,7 @@ class CallsiteContextGraph {
void dump() const;
void print(raw_ostream &OS) const;
+ void printTotalSizes(raw_ostream &OS) const;
friend raw_ostream &operator<<(raw_ostream &OS,
const CallsiteContextGraph &CCG) {
@@ -439,7 +442,7 @@ class CallsiteContextGraph {
void addStackNodesForMIB(ContextNode *AllocNode,
CallStack<NodeT, IteratorT> &StackContext,
CallStack<NodeT, IteratorT> &CallsiteContext,
- AllocationType AllocType);
+ AllocationType AllocType, uint64_t TotalSize);
/// Matches all callsite metadata (or summary) to the nodes created for
/// allocation memprof MIB metadata, synthesizing new nodes to reflect any
@@ -611,6 +614,8 @@ class CallsiteContextGraph {
/// Map from each context ID to the AllocationType assigned to that context.
DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
+ std::map<uint32_t, uint64_t> ContextIdToTotalSize;
+
/// Identifies the context node created for a stack id when adding the MIB
/// contexts to the graph. This is used to locate the context nodes when
/// trying to assign the corresponding callsites with those stack ids to these
@@ -1004,11 +1009,24 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
return AllocNode;
}
+static std::string getAllocTypeString(uint8_t AllocTypes) {
+ if (!AllocTypes)
+ return "None";
+ std::string Str;
+ if (AllocTypes & (uint8_t)AllocationType::NotCold)
+ Str += "NotCold";
+ if (AllocTypes & (uint8_t)AllocationType::Cold)
+ Str += "Cold";
+ return Str;
+}
+
template <typename DerivedCCG, typename FuncTy, typename CallTy>
template <class NodeT, class IteratorT>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
- CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
+ CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType,
+ uint64_t TotalSize) {
+ assert(!MemProfReportHintedSizes || TotalSize > 0);
// Treating the hot alloc type as NotCold before the disambiguation for "hot"
// is done.
if (AllocType == AllocationType::Hot)
@@ -1016,6 +1034,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
ContextIdToAllocationType[++LastContextId] = AllocType;
+ if (MemProfReportHintedSizes) {
+ assert(TotalSize);
+ ContextIdToTotalSize[LastContextId] = TotalSize;
+ }
+
// Update alloc type and context ids for this MIB.
AllocNode->AllocTypes |= (uint8_t)AllocType;
@@ -1060,6 +1083,10 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
assert(ContextIdToAllocationType.count(OldId));
// The new context has the same allocation type as original.
ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
+ // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy
+ // up the size. Assume that if we are able to duplicate context ids that we
+ // will be able to disambiguate all copies.
+ ContextIdToTotalSize[LastContextId] = 0;
}
return NewContextIds;
}
@@ -1663,7 +1690,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
addStackNodesForMIB<MDNode, MDNode::op_iterator>(
AllocNode, StackContext, CallsiteContext,
- getMIBAllocType(MIBMD));
+ getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD));
}
assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
// Memprof and callsite metadata on memory allocations no longer
@@ -1735,12 +1762,20 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
// stack ids on the allocation call during ModuleSummaryAnalysis.
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
EmptyContext;
+ unsigned I = 0;
+ assert(!MemProfReportHintedSizes ||
+ (AN.TotalSizes && AN.TotalSizes->size() == AN.MIBs.size()));
// Now add all of the MIBs and their stack nodes.
for (auto &MIB : AN.MIBs) {
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
StackContext(&MIB);
+ uint64_t TotalSize = 0;
+ if (MemProfReportHintedSizes)
+ TotalSize = (*AN.TotalSizes)[I];
addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
- AllocNode, StackContext, EmptyContext, MIB.AllocType);
+ AllocNode, StackContext, EmptyContext, MIB.AllocType,
+ TotalSize);
+ I++;
}
assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
// Initialize version 0 on the summary alloc node to the current alloc
@@ -2171,17 +2206,6 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(
return true;
}
-static std::string getAllocTypeString(uint8_t AllocTypes) {
- if (!AllocTypes)
- return "None";
- std::string Str;
- if (AllocTypes & (uint8_t)AllocationType::NotCold)
- Str += "NotCold";
- if (AllocTypes & (uint8_t)AllocationType::Cold)
- Str += "Cold";
- return Str;
-}
-
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
const {
@@ -2261,6 +2285,30 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
}
}
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
+ raw_ostream &OS) const {
+ using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
+ for (const auto Node : nodes<GraphType>(this)) {
+ if (Node->isRemoved())
+ continue;
+ if (!Node->IsAllocation)
+ continue;
+ auto ContextIds = Node->getContextIds();
+ std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
+ std::sort(SortedIds.begin(), SortedIds.end());
+ for (auto Id : SortedIds) {
+ auto SizeI = ContextIdToTotalSize.find(Id);
+ assert(SizeI != ContextIdToTotalSize.end());
+ auto TypeI = ContextIdToAllocationType.find(Id);
+ assert(TypeI != ContextIdToAllocationType.end());
+ OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id
+ << " with total size " << SizeI->second << " is "
+ << getAllocTypeString(Node->AllocTypes) << " after cloning\n";
+ }
+ }
+}
+
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
@@ -3797,6 +3845,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
if (ExportToDot)
exportToDot("clonefuncassign");
+ if (MemProfReportHintedSizes)
+ printTotalSizes(errs());
+
return Changed;
}
diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index 54e01e5fcdf95..e080279a16412 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -34,7 +34,7 @@
;; -stats requires asserts
; REQUIRES: asserts
-; RUN: opt -thinlto-bc %s >%t.o
+; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
@@ -43,9 +43,11 @@
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
+; RUN: -memprof-report-hinted-sizes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
+; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS \
+; RUN: --check-prefix=SIZES
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -64,9 +66,10 @@
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
+; RUN: -memprof-report-hinted-sizes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN: --check-prefix=STATS
+; RUN: --check-prefix=STATS --check-prefix=SIZES
; RUN: cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -125,9 +128,9 @@ attributes #0 = { noinline optnone }
!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{!3, !5}
-!3 = !{!4, !"notcold"}
+!3 = !{!4, !"notcold", i64 100}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold"}
+!5 = !{!6, !"cold", i64 400}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!8 = !{i64 -5964873800580613432}
@@ -265,6 +268,10 @@ attributes #0 = { noinline optnone }
; DUMP: Clone of [[BAR]]
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
+
+
; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
; REMARKS: created clone _Z3barv.memprof.1
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index 483582c6ced95..c685a413718a6 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -38,8 +38,9 @@
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
+; RUN: -memprof-report-hinted-sizes \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
-; RUN: --check-prefix=STATS --check-prefix=REMARKS
+; RUN: --check-prefix=STATS --check-prefix=REMARKS --check-prefix=SIZES
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -105,9 +106,9 @@ attributes #6 = { builtin }
!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{!3, !5}
-!3 = !{!4, !"notcold"}
+!3 = !{!4, !"notcold", i64 100}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold"}
+!5 = !{!6, !"cold", i64 400}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!8 = !{i64 -5964873800580613432}
@@ -249,6 +250,10 @@ attributes #6 = { builtin }
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
+; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
+; SIZES: Cold context 2 with total size 400 is Cold after cloning
+
+
; IR: define {{.*}} @main
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
>From f320adfa050b3bec2e5094ef7d97f4cdc9d7c5e7 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 11 Jul 2024 08:52:20 -0700
Subject: [PATCH 2/4] Address comments
---
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 6 ++----
llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp | 6 ++++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7b8f6371b5698..4463eb48b9811 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4706,8 +4706,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
writeFunctionHeapProfileRecords(
Stream, FS, CallsiteAbbrev, AllocAbbrev, AllocTotalSizeAbbrev,
/*PerModule*/ false,
- /*GetValueId*/
- [&](const ValueInfo &VI) -> unsigned {
+ /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
std::optional<unsigned> ValueID = GetValueId(VI);
// This can happen in shared index files for distributed ThinLTO if
// the callee function summary is not included. Record 0 which we
@@ -4717,8 +4716,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return 0;
return *ValueID;
},
- /*GetStackIndex*/
- [&](unsigned I) {
+ /*GetStackIndex*/ [&](unsigned I) {
// Get the corresponding index into the list of StackIds actually
// being written for this combined index (which may be a subset in
// the case of distributed indexes).
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index f5713d31e263f..01138555a8fbc 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -614,7 +614,9 @@ class CallsiteContextGraph {
/// Map from each context ID to the AllocationType assigned to that context.
DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
- std::map<uint32_t, uint64_t> ContextIdToTotalSize;
+ /// Map from each contextID to the profiled aggregate allocation size,
+ /// optionally populated when requested (via MemProfReportHintedSizes).
+ DenseMap<uint32_t, uint64_t> ContextIdToTotalSize;
/// Identifies the context node created for a stack id when adding the MIB
/// contexts to the graph. This is used to locate the context nodes when
@@ -2294,7 +2296,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
continue;
if (!Node->IsAllocation)
continue;
- auto ContextIds = Node->getContextIds();
+ DenseSet<uint32_t> ContextIds = Node->getContextIds();
std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
std::sort(SortedIds.begin(), SortedIds.end());
for (auto Id : SortedIds) {
>From 8be1c3cc1eee302c15fe105ca1ac5f38bde2fcec Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 11 Jul 2024 13:59:18 -0700
Subject: [PATCH 3/4] Address comments
---
llvm/include/llvm/Bitcode/LLVMBitCodes.h | 15 +---
llvm/include/llvm/IR/ModuleSummaryIndex.h | 10 +--
llvm/include/llvm/IR/ModuleSummaryIndexYAML.h | 3 +-
llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 6 +-
llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 2 -
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 58 ++++++++-------
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 72 +++++++------------
.../IPO/MemProfContextDisambiguation.cpp | 4 +-
llvm/test/Bitcode/summary_version.ll | 2 +-
.../thinlto-func-summary-vtableref-pgo.ll | 2 +-
10 files changed, 71 insertions(+), 103 deletions(-)
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 9f989aaf22779..4e6ef4c1a8c31 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -307,7 +307,8 @@ enum GlobalValueSummarySymtabCodes {
// [valueid, n x stackidindex]
FS_PERMODULE_CALLSITE_INFO = 26,
// Summary of per-module allocation memprof metadata.
- // [n x (alloc type, nummib, nummib x stackidindex)]
+ // [nummib, nummib x (alloc type, nummib, nummib x stackidindex),
+ // [nummib x total size]?]
FS_PERMODULE_ALLOC_INFO = 27,
// Summary of combined index memprof callsite metadata.
// [valueid, numstackindices, numver,
@@ -316,19 +317,9 @@ enum GlobalValueSummarySymtabCodes {
// Summary of combined index allocation memprof metadata.
// [nummib, numver,
// nummib x (alloc type, numstackids, numstackids x stackidindex),
- // numver x version]
+ // numver x version, [nummib x total size]?]
FS_COMBINED_ALLOC_INFO = 29,
FS_STACK_IDS = 30,
- // Summary of per-module allocation memprof metadata when we are tracking
- // total sizes.
- // [n x (alloc type, total size, nummib, nummib x stackidindex)]
- FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES = 31,
- // Summary of combined index allocation memprof metadata when we are tracking
- // total sizes.
- // [nummib, numver,
- // nummib x (alloc type, total size, numstackids,
- // numstackids x stackidindex), numver x version]
- FS_COMBINED_ALLOC_INFO_TOTAL_SIZES = 32,
};
enum MetadataCodes {
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 950c0ed824dbb..949e60ffd1dbf 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -404,8 +404,8 @@ struct AllocInfo {
std::vector<MIBInfo> MIBs;
// If requested, keep track of total profiled sizes for each MIB. This will be
- // a vector of the same length and order as the MIBs vector.
- std::unique_ptr<std::vector<uint64_t>> TotalSizes;
+ // a vector of the same length and order as the MIBs vector, if non-empty.
+ std::vector<uint64_t> TotalSizes;
AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
Versions.push_back(0);
@@ -427,10 +427,10 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
for (auto &M : AE.MIBs) {
OS << "\t\t" << M << "\n";
}
- if (AE.TotalSizes) {
+ if (!AE.TotalSizes.empty()) {
OS << " TotalSizes per MIB:\n\t\t";
First = true;
- for (auto &TS : *AE.TotalSizes) {
+ for (auto &TS : AE.TotalSizes) {
if (!First)
OS << ", ";
First = false;
@@ -1445,7 +1445,7 @@ class ModuleSummaryIndex {
// in the way some record are interpreted, like flags for instance.
// Note that incrementing this may require changes in both BitcodeReader.cpp
// and BitcodeWriter.cpp.
- static constexpr uint64_t BitcodeSummaryVersion = 9;
+ static constexpr uint64_t BitcodeSummaryVersion = 10;
// Regular LTO module name for ASM writer
static constexpr const char *getRegularLTOModuleName() {
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 26bf08f9f7439..b2747d24c5396 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -224,7 +224,6 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
V.emplace(RefGUID, /*IsAnalysis=*/false);
Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*V.find(RefGUID)));
}
- std::vector<AllocInfo> EmptyAllocInfo;
Elem.SummaryList.push_back(std::make_unique<FunctionSummary>(
GlobalValueSummary::GVFlags(
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
@@ -239,7 +238,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
std::move(FSum.TypeTestAssumeConstVCalls),
std::move(FSum.TypeCheckedLoadConstVCalls),
ArrayRef<FunctionSummary::ParamAccess>{}, ArrayRef<CallsiteInfo>{},
- std::move(EmptyAllocInfo)));
+ ArrayRef<AllocInfo>{}));
}
}
static void output(IO &io, GlobalValueSummaryMapTy &V) {
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index c3b6fd8bc20af..e9490ccba8215 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -548,8 +548,7 @@ static void computeFunctionSummary(
Allocs.push_back(AllocInfo(std::move(MIBs)));
if (MemProfReportHintedSizes) {
assert(Allocs.back().MIBs.size() == TotalSizes.size());
- Allocs.back().TotalSizes =
- std::make_unique<std::vector<uint64_t>>(std::move(TotalSizes));
+ Allocs.back().TotalSizes = std::move(TotalSizes);
}
} else if (!InstCallsite.empty()) {
SmallVector<unsigned> StackIdIndices;
@@ -948,7 +947,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
CantBePromoted.insert(GV->getGUID());
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
- std::vector<AllocInfo> EmptyAllocInfo;
std::unique_ptr<FunctionSummary> Summary =
std::make_unique<FunctionSummary>(
GVFlags, /*InstCount=*/0,
@@ -971,7 +969,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
ArrayRef<FunctionSummary::ConstVCall>{},
ArrayRef<FunctionSummary::ConstVCall>{},
ArrayRef<FunctionSummary::ParamAccess>{},
- ArrayRef<CallsiteInfo>{}, std::move(EmptyAllocInfo));
+ ArrayRef<CallsiteInfo>{}, ArrayRef<AllocInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 82aa7b7e077a1..b7ed9cdf63145 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -328,8 +328,6 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO)
STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
STRINGIFY_CODE(FS, STACK_IDS)
- STRINGIFY_CODE(FS, PERMODULE_ALLOC_INFO_TOTAL_SIZES)
- STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_TOTAL_SIZES)
}
case bitc::METADATA_ATTACHMENT_ID:
switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1cba859a3fade..6203c6e5119d1 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7991,19 +7991,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
break;
}
- case bitc::FS_PERMODULE_ALLOC_INFO:
- case bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES: {
- bool HasTotalSizes = BitCode == bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES;
+ case bitc::FS_PERMODULE_ALLOC_INFO: {
unsigned I = 0;
std::vector<MIBInfo> MIBs;
- std::vector<uint64_t> TotalSizes;
- while (I < Record.size()) {
- assert(Record.size() - I >= (HasTotalSizes ? 3 : 2));
+ unsigned NumMIBs = 0;
+ if (Version >= 10)
+ NumMIBs = Record[I++];
+ unsigned MIBsRead = 0;
+ while ((Version >= 10 && MIBsRead++ < NumMIBs) ||
+ (Version < 10 && I < Record.size())) {
+ assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
- if (HasTotalSizes) {
- TotalSizes.push_back(Record[I++]);
- assert(TotalSizes.back());
- }
unsigned NumStackEntries = Record[I++];
assert(Record.size() - I >= NumStackEntries);
SmallVector<unsigned> StackIdList;
@@ -8014,32 +8012,31 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
}
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
+ std::vector<uint64_t> TotalSizes;
+ // We either have no sizes or NumMIBs of them.
+ assert(I == Record.size() || Record.size() - I == NumMIBs);
+ if (I < Record.size()) {
+ MIBsRead = 0;
+ while (MIBsRead++ < NumMIBs)
+ TotalSizes.push_back(Record[I++]);
+ }
PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
- assert(HasTotalSizes != TotalSizes.empty());
- if (HasTotalSizes) {
+ if (!TotalSizes.empty()) {
assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
- PendingAllocs.back().TotalSizes =
- std::make_unique<std::vector<uint64_t>>(std::move(TotalSizes));
+ PendingAllocs.back().TotalSizes = std::move(TotalSizes);
}
break;
}
- case bitc::FS_COMBINED_ALLOC_INFO:
- case bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES: {
- bool HasTotalSizes = BitCode == bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES;
+ case bitc::FS_COMBINED_ALLOC_INFO: {
unsigned I = 0;
std::vector<MIBInfo> MIBs;
- std::vector<uint64_t> TotalSizes;
unsigned NumMIBs = Record[I++];
unsigned NumVersions = Record[I++];
unsigned MIBsRead = 0;
while (MIBsRead++ < NumMIBs) {
- assert(Record.size() - I >= (HasTotalSizes ? 3 : 2));
+ assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
- if (HasTotalSizes) {
- TotalSizes.push_back(Record[I++]);
- assert(TotalSizes.back());
- }
unsigned NumStackEntries = Record[I++];
assert(Record.size() - I >= NumStackEntries);
SmallVector<unsigned> StackIdList;
@@ -8054,13 +8051,20 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
SmallVector<uint8_t> Versions;
for (unsigned J = 0; J < NumVersions; J++)
Versions.push_back(Record[I++]);
+ std::vector<uint64_t> TotalSizes;
+ // We either have no sizes or NumMIBs of them.
+ assert(I == Record.size() || Record.size() - I == NumMIBs);
+ if (I < Record.size()) {
+ MIBsRead = 0;
+ while (MIBsRead++ < NumMIBs) {
+ TotalSizes.push_back(Record[I++]);
+ }
+ }
PendingAllocs.push_back(
AllocInfo(std::move(Versions), std::move(MIBs)));
- assert(HasTotalSizes != TotalSizes.empty());
- if (HasTotalSizes) {
+ if (!TotalSizes.empty()) {
assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
- PendingAllocs.back().TotalSizes =
- std::make_unique<std::vector<uint64_t>>(std::move(TotalSizes));
+ PendingAllocs.back().TotalSizes = std::move(TotalSizes);
}
break;
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 4463eb48b9811..b3ebe70e8c52f 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -230,8 +230,7 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
void writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
- unsigned CallsiteAbbrev, unsigned AllocAbbrev,
- unsigned AllocTotalSizeAbbrev, const Function &F);
+ unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev,
@@ -4159,7 +4158,7 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
static void writeFunctionHeapProfileRecords(
BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
- unsigned AllocAbbrev, unsigned AllocTotalSizeAbbrev, bool PerModule,
+ unsigned AllocAbbrev, bool PerModule,
std::function<unsigned(const ValueInfo &VI)> GetValueID,
std::function<unsigned(unsigned)> GetStackIndex) {
SmallVector<uint64_t> Record;
@@ -4190,35 +4189,27 @@ static void writeFunctionHeapProfileRecords(
// Per module alloc versions should always have a single entry of
// value 0.
assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
- if (!PerModule) {
- Record.push_back(AI.MIBs.size());
+ Record.push_back(AI.MIBs.size());
+ if (!PerModule)
Record.push_back(AI.Versions.size());
- }
- unsigned I = 0;
- assert(!AI.TotalSizes || AI.TotalSizes->size() == AI.MIBs.size());
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
- if (AI.TotalSizes) {
- Record.push_back((*AI.TotalSizes)[I]);
- assert((*AI.TotalSizes)[I]);
- }
Record.push_back(MIB.StackIdIndices.size());
for (auto Id : MIB.StackIdIndices)
Record.push_back(GetStackIndex(Id));
- I++;
}
if (!PerModule) {
for (auto V : AI.Versions)
Record.push_back(V);
}
- unsigned Code =
- PerModule ? (AI.TotalSizes ? bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES
- : bitc::FS_PERMODULE_ALLOC_INFO)
- : (AI.TotalSizes ? bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES
- : bitc::FS_COMBINED_ALLOC_INFO);
- unsigned AllocAbbrevToUse =
- AI.TotalSizes ? AllocTotalSizeAbbrev : AllocAbbrev;
- Stream.EmitRecord(Code, Record, AllocAbbrevToUse);
+ assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size());
+ if (!AI.TotalSizes.empty()) {
+ for (auto Size : AI.TotalSizes)
+ Record.push_back(Size);
+ }
+ Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
+ : bitc::FS_COMBINED_ALLOC_INFO,
+ Record, AllocAbbrev);
}
}
@@ -4227,7 +4218,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsRelBFAbbrev,
unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
- unsigned AllocAbbrev, unsigned AllocTotalSizeAbbrev, const Function &F) {
+ unsigned AllocAbbrev, const Function &F) {
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4238,7 +4229,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
});
writeFunctionHeapProfileRecords(
- Stream, FS, CallsiteAbbrev, AllocAbbrev, AllocTotalSizeAbbrev,
+ Stream, FS, CallsiteAbbrev, AllocAbbrev,
/*PerModule*/ true,
/*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
/*GetStackIndex*/ [&](unsigned I) { return I; });
@@ -4445,18 +4436,13 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
// n x (alloc type, numstackids, numstackids x stackidindex)
+ // optional: nummib x total size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO_TOTAL_SIZES));
- // n x (alloc type, total size, numstackids, numstackids x stackidindex)
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned AllocTotalSizeAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
// ensure the ordering is stable.
@@ -4474,10 +4460,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
continue;
}
auto *Summary = VI.getSummaryList()[0].get();
- writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
- FSCallsRelBFAbbrev,
- FSCallsProfileAbbrev, CallsiteAbbrev,
- AllocAbbrev, AllocTotalSizeAbbrev, F);
+ writePerModuleFunctionSummaryRecord(
+ NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
+ FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, F);
}
// Capture references from GlobalVariable initializers, which are outside
@@ -4597,6 +4582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// nummib x (alloc type, numstackids, numstackids x stackidindex),
// numver x version
+ // optional: nummib x total size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
@@ -4607,16 +4593,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return DecSummaries->count(GVS);
};
- Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO_TOTAL_SIZES));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
- // nummib x (alloc type, total size, numstackids, numstackids x stackidindex),
- // numver x version
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned AllocTotalSizeAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
SmallVector<AliasSummary *, 64> Aliases;
@@ -4704,9 +4680,10 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
getReferencedTypeIds(FS, ReferencedTypeIds);
writeFunctionHeapProfileRecords(
- Stream, FS, CallsiteAbbrev, AllocAbbrev, AllocTotalSizeAbbrev,
+ Stream, FS, CallsiteAbbrev, AllocAbbrev,
/*PerModule*/ false,
- /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
+ /*GetValueId*/
+ [&](const ValueInfo &VI) -> unsigned {
std::optional<unsigned> ValueID = GetValueId(VI);
// This can happen in shared index files for distributed ThinLTO if
// the callee function summary is not included. Record 0 which we
@@ -4716,7 +4693,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
return 0;
return *ValueID;
},
- /*GetStackIndex*/ [&](unsigned I) {
+ /*GetStackIndex*/
+ [&](unsigned I) {
// Get the corresponding index into the list of StackIds actually
// being written for this combined index (which may be a subset in
// the case of distributed indexes).
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 01138555a8fbc..ef9ddeaaab632 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -1766,14 +1766,14 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
EmptyContext;
unsigned I = 0;
assert(!MemProfReportHintedSizes ||
- (AN.TotalSizes && AN.TotalSizes->size() == AN.MIBs.size()));
+ AN.TotalSizes.size() == AN.MIBs.size());
// Now add all of the MIBs and their stack nodes.
for (auto &MIB : AN.MIBs) {
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
StackContext(&MIB);
uint64_t TotalSize = 0;
if (MemProfReportHintedSizes)
- TotalSize = (*AN.TotalSizes)[I];
+ TotalSize = AN.TotalSizes[I];
addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
AllocNode, StackContext, EmptyContext, MIB.AllocType,
TotalSize);
diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index 98feab6fe2f99..26c64f81a773f 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=9/>
+; CHECK: <VERSION op0=10/>
diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
index 19e228fd5355c..b3f1e770810d2 100644
--- a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
+++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
@@ -11,7 +11,7 @@
; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK-NEXT: <VERSION op0=9/>
+; CHECK-NEXT: <VERSION op0=
; CHECK-NEXT: <FLAGS op0=0/>
; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
; that loads vtable pointers.
>From 3ad6a4ee65c6a76e227f2e38b081cf9f7ff362e0 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 11 Jul 2024 14:01:23 -0700
Subject: [PATCH 4/4] Fix typo in original comment
---
llvm/include/llvm/Bitcode/LLVMBitCodes.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 4e6ef4c1a8c31..184bbe32df695 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -307,7 +307,7 @@ enum GlobalValueSummarySymtabCodes {
// [valueid, n x stackidindex]
FS_PERMODULE_CALLSITE_INFO = 26,
// Summary of per-module allocation memprof metadata.
- // [nummib, nummib x (alloc type, nummib, nummib x stackidindex),
+ // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
// [nummib x total size]?]
FS_PERMODULE_ALLOC_INFO = 27,
// Summary of combined index memprof callsite metadata.
More information about the llvm-commits
mailing list