[llvm] ccb4702 - [MemProf] Use radix tree for alloc contexts in bitcode summaries (#117066)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 22 14:49:59 PST 2024
Author: Teresa Johnson
Date: 2024-11-22T14:49:55-08:00
New Revision: ccb4702038900d82d1041ff610788740f5cef723
URL: https://github.com/llvm/llvm-project/commit/ccb4702038900d82d1041ff610788740f5cef723
DIFF: https://github.com/llvm/llvm-project/commit/ccb4702038900d82d1041ff610788740f5cef723.diff
LOG: [MemProf] Use radix tree for alloc contexts in bitcode summaries (#117066)
Leverage the support added to represent allocation contexts in a more
compact way via a radix tree in the indexed profile to similarly reduce
sizes of the bitcode summaries.
For a large target, this reduced the size of the per-module summaries by
about 18% and in the distributed combined index files by 28%.
Added:
llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc
llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll
Modified:
llvm/include/llvm/Bitcode/LLVMBitCodes.h
llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
llvm/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/lib/ProfileData/MemProf.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index a0fb32f67e3858..41909a8fc1d590 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -307,12 +307,12 @@ enum GlobalValueSummarySymtabCodes {
// [valueid, n x stackidindex]
FS_PERMODULE_CALLSITE_INFO = 26,
// Summary of per-module allocation memprof metadata.
- // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
+ // [nummib, nummib x (alloc type, context radix tree index),
// [nummib x (numcontext x total size)]?]
FS_PERMODULE_ALLOC_INFO = 27,
// Summary of combined index memprof callsite metadata.
- // [valueid, numstackindices, numver,
- // numstackindices x stackidindex, numver x version]
+ // [valueid, context radix tree index, numver,
+ // numver x version]
FS_COMBINED_CALLSITE_INFO = 28,
// Summary of combined index allocation memprof metadata.
// [nummib, numver,
@@ -331,6 +331,10 @@ enum GlobalValueSummarySymtabCodes {
// the entries must be in the exact same order as the corresponding sizes.
// [nummib x (numcontext x full stack id)]
FS_ALLOC_CONTEXT_IDS = 31,
+ // Linearized radix tree of allocation contexts. See the description above the
+ // CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
+ // [n x entry]
+ FS_CONTEXT_RADIX_TREE_ARRAY = 32,
};
enum MetadataCodes {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 8f79ccdb9ff75f..032c0de3c7a00f 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -329,6 +329,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
STRINGIFY_CODE(FS, STACK_IDS)
STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
+ STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
}
case bitc::METADATA_ATTACHMENT_ID:
switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 3e6abacac27261..11fbe6e6158eec 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -987,6 +987,10 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
/// ids from the lists in the callsite and alloc entries to the index.
std::vector<uint64_t> StackIds;
+ /// Linearized radix tree of allocation contexts. See the description above
+ /// the CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
+ std::vector<uint64_t> RadixArray;
+
public:
ModuleSummaryIndexBitcodeReader(
BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -1013,6 +1017,8 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
TypeIdCompatibleVtableInfo &TypeId);
std::vector<FunctionSummary::ParamAccess>
parseParamAccesses(ArrayRef<uint64_t> Record);
+ SmallVector<unsigned> parseAllocInfoContext(ArrayRef<uint64_t> Record,
+ unsigned &I);
template <bool AllowNullValueInfo = false>
std::pair<ValueInfo, GlobalValue::GUID>
@@ -7544,6 +7550,48 @@ void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord(
parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId);
}
+SmallVector<unsigned> ModuleSummaryIndexBitcodeReader::parseAllocInfoContext(
+ ArrayRef<uint64_t> Record, unsigned &I) {
+ SmallVector<unsigned> StackIdList;
+ // For backwards compatibility with old format before radix tree was
+ // used, simply see if we found a radix tree array record (and thus if
+ // the RadixArray is non-empty).
+ if (RadixArray.empty()) {
+ unsigned NumStackEntries = Record[I++];
+ assert(Record.size() - I >= NumStackEntries);
+ StackIdList.reserve(NumStackEntries);
+ for (unsigned J = 0; J < NumStackEntries; J++) {
+ assert(Record[I] < StackIds.size());
+ StackIdList.push_back(
+ TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
+ }
+ } else {
+ unsigned RadixIndex = Record[I++];
+ // See the comments above CallStackRadixTreeBuilder in ProfileData/MemProf.h
+ // for a detailed description of the radix tree array format. Briefly, the
+ // first entry will be the number of frames, any negative values are the
+ // negative of the offset of the next frame, and otherwise the frames are in
+ // increasing linear order.
+ assert(RadixIndex < RadixArray.size());
+ unsigned NumStackIds = RadixArray[RadixIndex++];
+ StackIdList.reserve(NumStackIds);
+ while (NumStackIds--) {
+ assert(RadixIndex < RadixArray.size());
+ unsigned Elem = RadixArray[RadixIndex];
+ if (static_cast<std::make_signed_t<unsigned>>(Elem) < 0) {
+ RadixIndex = RadixIndex - Elem;
+ assert(RadixIndex < RadixArray.size());
+ Elem = RadixArray[RadixIndex];
+ // We shouldn't encounter a second offset in a row.
+ assert(static_cast<std::make_signed_t<unsigned>>(Elem) >= 0);
+ }
+ RadixIndex++;
+ StackIdList.push_back(TheIndex.addOrGetStackIdIndex(StackIds[Elem]));
+ }
+ }
+ return StackIdList;
+}
+
static void setSpecialRefs(SmallVectorImpl<ValueInfo> &Refs, unsigned ROCnt,
unsigned WOCnt) {
// Readonly and writeonly refs are in the end of the refs list.
@@ -8010,6 +8058,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
break;
}
+ case bitc::FS_CONTEXT_RADIX_TREE_ARRAY: { // [n x entry]
+ RadixArray = ArrayRef<uint64_t>(Record);
+ break;
+ }
+
case bitc::FS_PERMODULE_CALLSITE_INFO: {
unsigned ValueID = Record[0];
SmallVector<unsigned> StackIdList;
@@ -8065,14 +8118,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
(Version < 10 && I < Record.size())) {
assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
- unsigned NumStackEntries = Record[I++];
- assert(Record.size() - I >= NumStackEntries);
- SmallVector<unsigned> StackIdList;
- for (unsigned J = 0; J < NumStackEntries; J++) {
- assert(Record[I] < StackIds.size());
- StackIdList.push_back(
- TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
- }
+ auto StackIdList = parseAllocInfoContext(Record, I);
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
// We either have nothing left or at least NumMIBs context size info
@@ -8123,14 +8169,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
while (MIBsRead++ < NumMIBs) {
assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
- unsigned NumStackEntries = Record[I++];
- assert(Record.size() - I >= NumStackEntries);
- SmallVector<unsigned> StackIdList;
- for (unsigned J = 0; J < NumStackEntries; J++) {
- assert(Record[I] < StackIds.size());
- StackIdList.push_back(
- TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
- }
+ auto StackIdList = parseAllocInfoContext(Record, I);
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
assert(Record.size() - I >= NumVersions);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 59e070a5110620..8f22a50a5e0245 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -60,6 +60,7 @@
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRSymtab.h"
+#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -83,6 +84,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::memprof;
static cl::opt<unsigned>
IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
@@ -231,7 +233,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId,
- const Function &F);
+ const Function &F, DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+ CallStackId &CallStackCount);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev,
@@ -4195,12 +4198,58 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
}
}
+// Adds the allocation contexts to the CallStacks map. We simply use the
+// size at the time the context was added as the CallStackId. This works because
+// when we look up the call stacks later on we process the function summaries
+// and their allocation records in the same exact order.
+static void collectMemProfCallStacks(
+ FunctionSummary *FS, std::function<LinearFrameId(unsigned)> GetStackIndex,
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks) {
+ // The interfaces in ProfileData/MemProf.h use a type alias for a stack frame
+ // id offset into the index of the full stack frames. The ModuleSummaryIndex
+ // currently uses unsigned. Make sure these stay in sync.
+ static_assert(std::is_same_v<LinearFrameId, unsigned>);
+ for (auto &AI : FS->allocs()) {
+ for (auto &MIB : AI.MIBs) {
+ SmallVector<unsigned> StackIdIndices;
+ StackIdIndices.reserve(MIB.StackIdIndices.size());
+ for (auto Id : MIB.StackIdIndices)
+ StackIdIndices.push_back(GetStackIndex(Id));
+ // The CallStackId is the size at the time this context was inserted.
+ CallStacks.insert({CallStacks.size(), StackIdIndices});
+ }
+ }
+}
+
+// Build the radix tree from the accumulated CallStacks, write out the resulting
+// linearized radix tree array, and return the map of call stack positions into
+// this array for use when writing the allocation records. The returned map is
+// indexed by a CallStackId which in this case is implicitly determined by the
+// order of function summaries and their allocation infos being written.
+static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &&CallStacks,
+ BitstreamWriter &Stream, unsigned RadixAbbrev) {
+ assert(!CallStacks.empty());
+ DenseMap<unsigned, FrameStat> FrameHistogram =
+ computeFrameHistogram<LinearFrameId>(CallStacks);
+ CallStackRadixTreeBuilder<LinearFrameId> Builder;
+ // We don't need a MemProfFrameIndexes map as we have already converted the
+ // full stack id hash to a linear offset into the StackIds array.
+ Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt,
+ FrameHistogram);
+ Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(),
+ RadixAbbrev);
+ return Builder.takeCallStackPos();
+}
+
static void writeFunctionHeapProfileRecords(
BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule,
std::function<unsigned(const ValueInfo &VI)> GetValueID,
std::function<unsigned(unsigned)> GetStackIndex,
- bool WriteContextSizeInfoIndex) {
+ bool WriteContextSizeInfoIndex,
+ DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+ CallStackId &CallStackCount) {
SmallVector<uint64_t> Record;
for (auto &CI : FS->callsites()) {
@@ -4234,9 +4283,9 @@ static void writeFunctionHeapProfileRecords(
Record.push_back(AI.Versions.size());
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
- Record.push_back(MIB.StackIdIndices.size());
- for (auto Id : MIB.StackIdIndices)
- Record.push_back(GetStackIndex(Id));
+ // Record the index into the radix tree array for this context.
+ assert(CallStackCount <= CallStackPos.size());
+ Record.push_back(CallStackPos[CallStackCount++]);
}
if (!PerModule) {
for (auto V : AI.Versions)
@@ -4282,7 +4331,9 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsRelBFAbbrev,
unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
- unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) {
+ unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F,
+ DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+ CallStackId &CallStackCount) {
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4297,7 +4348,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
/*PerModule*/ true,
/*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
/*GetStackIndex*/ [&](unsigned I) { return I; },
- /*WriteContextSizeInfoIndex*/ true);
+ /*WriteContextSizeInfoIndex*/ true, CallStackPos, CallStackCount);
auto SpecialRefCnts = FS->specialRefCounts();
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
@@ -4530,12 +4581,54 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
- // n x (alloc type, numstackids, numstackids x stackidindex)
+ // n x (alloc type, context radix tree index)
// optional: nummib x (numcontext x total size)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+ // n x entry
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // First walk through all the functions and collect the allocation contexts in
+ // their associated summaries, for use in constructing a radix tree of
+ // contexts. Note that we need to do this in the same order as the functions
+ // are processed further below since the call stack positions in the resulting
+ // radix tree array are identified based on this order.
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+ for (const Function &F : M) {
+ // Summary emission does not support anonymous functions, they have to be
+ // renamed using the anonymous function renaming pass.
+ if (!F.hasName())
+ report_fatal_error("Unexpected anonymous function when writing summary");
+
+ ValueInfo VI = Index->getValueInfo(F.getGUID());
+ if (!VI || VI.getSummaryList().empty()) {
+ // Only declarations should not have a summary (a declaration might
+ // however have a summary if the def was in module level asm).
+ assert(F.isDeclaration());
+ continue;
+ }
+ auto *Summary = VI.getSummaryList()[0].get();
+ FunctionSummary *FS = cast<FunctionSummary>(Summary);
+ collectMemProfCallStacks(
+ FS, /*GetStackIndex*/ [](unsigned I) { return I; }, CallStacks);
+ }
+ // Finalize the radix tree, write it out, and get the map of positions in the
+ // linearized tree array.
+ DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+ if (!CallStacks.empty()) {
+ CallStackPos =
+ writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
+ }
+
+ // Keep track of the current index into the CallStackPos map.
+ CallStackId CallStackCount = 0;
+
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
// ensure the ordering is stable.
@@ -4555,7 +4648,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
auto *Summary = VI.getSummaryList()[0].get();
writePerModuleFunctionSummaryRecord(
NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
- FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F);
+ FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F,
+ CallStackPos, CallStackCount);
}
// Capture references from GlobalVariable initializers, which are outside
@@ -4692,13 +4786,20 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
- // nummib x (alloc type, numstackids, numstackids x stackidindex),
+ // nummib x (alloc type, context radix tree index),
// numver x version
// optional: nummib x total size
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+ // n x entry
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
if (DecSummaries == nullptr)
return false;
@@ -4735,6 +4836,41 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.clear();
};
+ // First walk through all the functions and collect the allocation contexts in
+ // their associated summaries, for use in constructing a radix tree of
+ // contexts. Note that we need to do this in the same order as the functions
+ // are processed further below since the call stack positions in the resulting
+ // radix tree array are identified based on this order.
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+ forEachSummary([&](GVInfo I, bool IsAliasee) {
+ GlobalValueSummary *S = I.second;
+ assert(S);
+ auto *FS = dyn_cast<FunctionSummary>(S);
+ if (!FS)
+ return;
+ collectMemProfCallStacks(
+ FS,
+ /*GetStackIndex*/
+ [&](unsigned I) {
+ // Get the corresponding index into the list of StackIds actually
+ // being written for this combined index (which may be a subset in
+ // the case of distributed indexes).
+ assert(StackIdIndicesToIndex.contains(I));
+ return StackIdIndicesToIndex[I];
+ },
+ CallStacks);
+ });
+ // Finalize the radix tree, write it out, and get the map of positions in the
+ // linearized tree array.
+ DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+ if (!CallStacks.empty()) {
+ CallStackPos =
+ writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
+ }
+
+ // Keep track of the current index into the CallStackPos map.
+ CallStackId CallStackCount = 0;
+
DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
forEachSummary([&](GVInfo I, bool IsAliasee) {
GlobalValueSummary *S = I.second;
@@ -4813,7 +4949,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
assert(StackIdIndicesToIndex.contains(I));
return StackIdIndicesToIndex[I];
},
- /*WriteContextSizeInfoIndex*/ false);
+ /*WriteContextSizeInfoIndex*/ false, CallStackPos, CallStackCount);
NameVals.push_back(*ValueId);
assert(ModuleIdMap.count(FS->modulePath()));
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 9615fdf77eb27e..70741ee4850bd8 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -510,6 +510,7 @@ void CallStackRadixTreeBuilder<FrameIdTy>::build(
// Explicitly instantiate class with the utilized FrameIdTy.
template class CallStackRadixTreeBuilder<FrameId>;
+template class CallStackRadixTreeBuilder<LinearFrameId>;
template <typename FrameIdTy>
llvm::DenseMap<FrameIdTy, FrameStat>
@@ -532,6 +533,10 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
&MemProfCallStackData);
+template llvm::DenseMap<LinearFrameId, FrameStat>
+computeFrameHistogram<LinearFrameId>(
+ llvm::MapVector<CallStackId, llvm::SmallVector<LinearFrameId>>
+ &MemProfCallStackData);
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
for (const auto &AS : Record.AllocSites) {
diff --git a/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc b/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc
new file mode 100644
index 00000000000000..c98308f4637f84
Binary files /dev/null and b/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc
diff er
diff --git a/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll b/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll
new file mode 100644
index 00000000000000..20f95617915cc5
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll
@@ -0,0 +1,28 @@
+;; Check that we can read the old *_ALLOC_INFO summary format that placed the
+;; stack id indexes directly in the alloc info summary, rather than encoding as
+;; a separate radix tree.
+;;
+;; The old bitcode was generated by the older compiler from `opt -thinlto-bc`
+;; on the following LLVM assembly:
+;;
+;; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+;; target triple = "x86_64-unknown-linux-gnu"
+;;
+;; define internal ptr @_Z3barv() #0 {
+;; entry:
+;; %call = call ptr @_Znam(i64 0), !memprof !1, !callsite !6
+;; ret ptr null
+;; }
+;;
+;; declare ptr @_Znam(i64)
+;;
+;; !1 = !{!2, !4}
+;; !2 = !{!3, !"notcold"}
+;; !3 = !{i64 9086428284934609951, i64 8632435727821051414}
+;; !4 = !{!5, !"cold"}
+;; !5 = !{i64 9086428284934609951, i64 2732490490862098848}
+;; !6 = !{i64 9086428284934609951}
+
+; RUN: llvm-dis %S/Inputs/memprof-old-alloc-context-summary.bc -o - | FileCheck %s
+; CHECK: stackIds: (8632435727821051414)
+; CHECK-SAME: stackIds: (2732490490862098848)
More information about the llvm-commits
mailing list