[llvm] [memprof] Add Version2 of the indexed MemProf format (PR #89100)
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 13:58:45 PDT 2024
https://github.com/kazutakahirata updated https://github.com/llvm/llvm-project/pull/89100
>From ac79223ae26d6a2d702b0067cf4dca114b5fc581 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Thu, 11 Apr 2024 10:24:26 -0700
Subject: [PATCH 1/4] [memprof] Add Version2 of the indexed MemProf format
This patch adds Version2 of the indexed MemProf format. The new
format comes with a hash table from CallStackId to actual call stacks
llvm::SmallVector<FrameId>. The rest of the format refers to call
stacks with CallStackId. This "values + references" model effectively
deduplicates call stacks. Without this patch, a large indexed memprof
file of mine shrinks from 4.4GB to 1.6GB, a 64% reduction.
This patch does not make Version2 generally available yet as I am
planning to make a few more changes to the format.
---
.../llvm/ProfileData/InstrProfReader.h | 4 +
.../llvm/ProfileData/InstrProfWriter.h | 10 ++
llvm/include/llvm/ProfileData/MemProf.h | 92 ++++++++++++++++++-
llvm/include/llvm/ProfileData/MemProfReader.h | 6 ++
llvm/lib/ProfileData/InstrProfReader.cpp | 66 +++++++++++--
llvm/lib/ProfileData/InstrProfWriter.cpp | 90 ++++++++++++++++--
.../tools/llvm-profdata/memprof-merge-v0.test | 3 +
llvm/tools/llvm-profdata/llvm-profdata.cpp | 15 ++-
8 files changed, 270 insertions(+), 16 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index f662febb9216bb..c45b9084744d63 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -560,6 +560,8 @@ using MemProfRecordHashTable =
OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
using MemProfFrameHashTable =
OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
+using MemProfCallStackHashTable =
+ OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
template <typename HashTableImpl>
class InstrProfReaderItaniumRemapper;
@@ -666,6 +668,8 @@ class IndexedInstrProfReader : public InstrProfReader {
std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
/// MemProf frame profile data on-disk indexed via frame id.
std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+ /// MemProf call stack data on-disk indexed via call stack id.
+ std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
/// VTableNamePtr points to the beginning of compressed vtable names.
/// When a symtab is constructed from profiles by llvm-profdata, the list of
/// names could be decompressed based on `VTableNamePtr` and
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 4b42392bc1e061..66372a2253a1e7 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -60,6 +60,10 @@ class InstrProfWriter {
// inline.
llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData;
+ // A map to hold call stack id to call stacks.
+ llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+ MemProfCallStackData;
+
// List of binary ids.
std::vector<llvm::object::BuildID> BinaryIds;
@@ -114,6 +118,12 @@ class InstrProfWriter {
bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F,
function_ref<void(Error)> Warn);
+ /// Add a call stack identified by the hash of the contents of the call stack
+ /// in \p CallStack.
+ bool addMemProfCallStack(const memprof::CallStackId CSId,
+ const llvm::SmallVector<memprof::FrameId> &CallStack,
+ function_ref<void(Error)> Warn);
+
// Add a binary id to the binary ids list.
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 7f3956bd739390..07f03fe1ac57f2 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -29,7 +29,7 @@ enum IndexedVersion : uint64_t {
};
constexpr uint64_t MinimumSupportedVersion = Version0;
-constexpr uint64_t MaximumSupportedVersion = Version1;
+constexpr uint64_t MaximumSupportedVersion = Version2;
// Verify that the minimum and maximum satisfy the obvious constraint.
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
@@ -633,6 +633,96 @@ class FrameLookupTrait {
}
};
+// Trait for writing call stacks to the on-disk hash table.
+class CallStackWriterTrait {
+public:
+ using key_type = CallStackId;
+ using key_type_ref = CallStackId;
+
+ using data_type = llvm::SmallVector<FrameId>;
+ using data_type_ref = llvm::SmallVector<FrameId> &;
+
+ using hash_value_type = CallStackId;
+ using offset_type = uint64_t;
+
+ static hash_value_type ComputeHash(key_type_ref K) { return K; }
+
+ static std::pair<offset_type, offset_type>
+ EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+ using namespace support;
+ endian::Writer LE(Out, llvm::endianness::little);
+ offset_type N = sizeof(K);
+ LE.write<offset_type>(N);
+ offset_type M = sizeof(CallStackId) * V.size();
+ LE.write<offset_type>(M);
+ return std::make_pair(N, M);
+ }
+
+ void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
+ using namespace support;
+ endian::Writer LE(Out, llvm::endianness::little);
+ LE.write<key_type>(K);
+ }
+
+ void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
+ offset_type /*Unused*/) {
+ using namespace support;
+ endian::Writer LE(Out, llvm::endianness::little);
+ // Emit the frames. We do not explicitly emit the length of the vector
+ // because it can be inferred from the data length.
+ for (FrameId F : V)
+ LE.write<FrameId>(F);
+ }
+};
+
+// Trait for reading call stack mappings from the on-disk hash table.
+class CallStackLookupTrait {
+public:
+ using data_type = const llvm::SmallVector<FrameId>;
+ using internal_key_type = CallStackId;
+ using external_key_type = CallStackId;
+ using hash_value_type = CallStackId;
+ using offset_type = uint64_t;
+
+ static bool EqualKey(internal_key_type A, internal_key_type B) {
+ return A == B;
+ }
+ static uint64_t GetInternalKey(internal_key_type K) { return K; }
+ static uint64_t GetExternalKey(external_key_type K) { return K; }
+
+ hash_value_type ComputeHash(internal_key_type K) { return K; }
+
+ static std::pair<offset_type, offset_type>
+ ReadKeyDataLength(const unsigned char *&D) {
+ using namespace support;
+
+ offset_type KeyLen =
+ endian::readNext<offset_type, llvm::endianness::little>(D);
+ offset_type DataLen =
+ endian::readNext<offset_type, llvm::endianness::little>(D);
+ return std::make_pair(KeyLen, DataLen);
+ }
+
+ uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
+ using namespace support;
+ return endian::readNext<external_key_type, llvm::endianness::little>(D);
+ }
+
+ data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
+ using namespace support;
+ llvm::SmallVector<FrameId> CS;
+ // Derive the number of frames from the data length.
+ uint64_t NumFrames = Length / sizeof(CallStackId);
+ assert(Length % sizeof(CallStackId) == 0);
+ CS.reserve(NumFrames);
+ for (size_t I = 0; I != NumFrames; ++I) {
+ FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
+ CS.push_back(F);
+ }
+ return CS;
+ }
+};
+
// Compute a CallStackId for a given call stack.
CallStackId hashCallStack(ArrayRef<FrameId> CS);
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 7fa8af184dc93b..444c58e8bdc8bc 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -51,6 +51,12 @@ class MemProfReader {
return IdToFrame;
}
+ // Return a const reference to the internal Id to call stacks.
+ const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> &
+ getCallStacks() const {
+ return CSIdToCallStack;
+ }
+
// Return a const reference to the internal function profile data.
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
getProfileData() const {
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 8574a96a1b06fc..ef509b821ff71b 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1249,14 +1249,14 @@ Error IndexedInstrProfReader::readHeader() {
// Read the first 64-bit word, which may be RecordTableOffset in
// memprof::MemProfVersion0 or the MemProf version number in
- // memprof::MemProfVersion1.
+ // memprof::MemProfVersion1 and above.
const uint64_t FirstWord =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
memprof::IndexedVersion Version = memprof::Version0;
- if (FirstWord == memprof::Version1) {
+ if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) {
// Everything is good. We can proceed to deserialize the rest.
- Version = memprof::Version1;
+ Version = static_cast<memprof::IndexedVersion>(FirstWord);
} else if (FirstWord >= 24) {
// This is a heuristic/hack to detect memprof::MemProfVersion0,
// which does not have a version field in the header.
@@ -1286,6 +1286,18 @@ Error IndexedInstrProfReader::readHeader() {
const uint64_t FrameTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ // The offset in the stream right before invoking
+ // CallStackTableGenerator.Emit.
+ uint64_t CallStackPayloadOffset = 0;
+ // The value returned from CallStackTableGenerator.Emit.
+ uint64_t CallStackTableOffset = 0;
+ if (Version >= memprof::Version2) {
+ CallStackPayloadOffset =
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ CallStackTableOffset =
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ }
+
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
if (!SchemaOr)
@@ -1296,7 +1308,7 @@ Error IndexedInstrProfReader::readHeader() {
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Ptr,
- /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
+ /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
// Initialize the frame table reader with the payload and bucket offsets.
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
@@ -1304,12 +1316,22 @@ Error IndexedInstrProfReader::readHeader() {
/*Payload=*/Start + FramePayloadOffset,
/*Base=*/Start, memprof::FrameLookupTrait()));
+ if (Version >= memprof::Version2)
+ MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
+ /*Buckets=*/Start + CallStackTableOffset,
+ /*Payload=*/Start + CallStackPayloadOffset,
+ /*Base=*/Start, memprof::CallStackLookupTrait()));
+
#ifdef EXPENSIVE_CHECKS
// Go through all the records and verify that CSId has been correctly
// populated. Do this only under EXPENSIVE_CHECKS. Otherwise, we
// would defeat the purpose of OnDiskIterableChainedHashTable.
- for (const auto &Record : MemProfRecordTable->data())
- verifyIndexedMemProfRecord(Record);
+ // Note that we can compare CSId against actual call stacks only for
+ // Version0 and Version1 because IndexedAllocationInfo::CallStack and
+ // IndexedMemProfRecord::CallSites are not populated in Version2.
+ if (Version <= memprof::Version1)
+ for (const auto &Record : MemProfRecordTable->data())
+ verifyIndexedMemProfRecord(Record);
#endif
}
@@ -1502,7 +1524,29 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
return *FrIter;
};
- memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
+ // Setup a callback to convert call stack ids to call stacks using the on-disk
+ // hash table.
+ std::optional<memprof::CallStackId> LastUnmappedCSId;
+ auto CSIdToCallStackCallback = [&](memprof::CallStackId CSId) {
+ llvm::SmallVector<memprof::Frame> Frames;
+ auto CSIter = MemProfCallStackTable->find(CSId);
+ if (CSIter == MemProfCallStackTable->end()) {
+ LastUnmappedFrameId = CSId;
+ } else {
+ const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
+ Frames.reserve(CS.size());
+ for (memprof::FrameId Id : CS)
+ Frames.push_back(IdToFrameCallback(Id));
+ }
+ return Frames;
+ };
+
+ const memprof::IndexedMemProfRecord IndexedRecord = *Iter;
+ memprof::MemProfRecord Record;
+ if (MemProfCallStackTable)
+ Record = IndexedRecord.toMemProfRecord(CSIdToCallStackCallback);
+ else
+ Record = memprof::MemProfRecord(IndexedRecord, IdToFrameCallback);
// Check that all frame ids were successfully converted to frames.
if (LastUnmappedFrameId) {
@@ -1510,6 +1554,14 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
"memprof frame not found for frame id " +
Twine(*LastUnmappedFrameId));
}
+
+ // Check that all call stack ids were successfully converted to call stacks.
+ if (LastUnmappedCSId) {
+ return make_error<InstrProfError>(
+ instrprof_error::hash_mismatch,
+ "memprof call stack not found for call stack id " +
+ Twine(*LastUnmappedCSId));
+ }
return Record;
}
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index ede042dd6b9c3c..6e01e52e5905ee 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -297,6 +297,23 @@ bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
return true;
}
+bool InstrProfWriter::addMemProfCallStack(
+ const memprof::CallStackId CSId,
+ const llvm::SmallVector<memprof::FrameId> &CallStack,
+ function_ref<void(Error)> Warn) {
+ auto Result = MemProfCallStackData.insert({CSId, CallStack});
+ // If a mapping already exists for the current call stack id and it does not
+ // match the new mapping provided then reset the existing contents and bail
+ // out. We don't support the merging of memprof data whose CallStack -> Id
+ // mapping across profiles is inconsistent.
+ if (!Result.second && Result.first->second != CallStack) {
+ Warn(make_error<InstrProfError>(instrprof_error::malformed,
+ "call stack to id mapping mismatch"));
+ return false;
+ }
+ return true;
+}
+
void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
llvm::append_range(BinaryIds, BIs);
}
@@ -378,6 +395,12 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
return;
}
+ MemProfCallStackData.reserve(IPW.MemProfCallStackData.size());
+ for (auto &[CSId, CallStack] : IPW.MemProfCallStackData) {
+ if (!addMemProfCallStack(CSId, CallStack, Warn))
+ return;
+ }
+
MemProfRecordData.reserve(IPW.MemProfRecordData.size());
for (auto &I : IPW.MemProfRecordData) {
addMemProfRecord(I.first, I.second);
@@ -427,8 +450,8 @@ static uint64_t writeMemProfRecords(
ProfOStream &OS,
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
&MemProfRecordData,
- memprof::MemProfSchema *Schema) {
- memprof::RecordWriterTrait RecordWriter(memprof::Version1);
+ memprof::MemProfSchema *Schema, memprof::IndexedVersion Version) {
+ memprof::RecordWriterTrait RecordWriter(Version);
RecordWriter.Schema = Schema;
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
RecordTableGenerator;
@@ -461,6 +484,20 @@ static uint64_t writeMemProfFrames(
return FrameTableGenerator.Emit(OS.OS);
}
+static uint64_t writeMemProfCallStacks(
+ ProfOStream &OS,
+ llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+ &MemProfCallStackData) {
+ OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
+ CallStackTableGenerator;
+ for (auto &[CSId, CallStack] : MemProfCallStackData)
+ CallStackTableGenerator.insert(CSId, CallStack);
+ // Release the memory of this vector as it is no longer needed.
+ MemProfCallStackData.clear();
+
+ return CallStackTableGenerator.Emit(OS.OS);
+}
+
static Error writeMemProfV0(
ProfOStream &OS,
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
@@ -475,7 +512,7 @@ static Error writeMemProfV0(
writeMemProfSchema(OS, Schema);
uint64_t RecordTableOffset =
- writeMemProfRecords(OS, MemProfRecordData, &Schema);
+ writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version0);
uint64_t FramePayloadOffset = OS.tell();
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
@@ -501,7 +538,7 @@ static Error writeMemProfV1(
writeMemProfSchema(OS, Schema);
uint64_t RecordTableOffset =
- writeMemProfRecords(OS, MemProfRecordData, &Schema);
+ writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version1);
uint64_t FramePayloadOffset = OS.tell();
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
@@ -512,6 +549,43 @@ static Error writeMemProfV1(
return Error::success();
}
+static Error writeMemProfV2(
+ ProfOStream &OS,
+ llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
+ &MemProfRecordData,
+ llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
+ llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+ &MemProfCallStackData) {
+ OS.write(memprof::Version2);
+ uint64_t HeaderUpdatePos = OS.tell();
+ OS.write(0ULL); // Reserve space for the memprof record table offset.
+ OS.write(0ULL); // Reserve space for the memprof frame payload offset.
+ OS.write(0ULL); // Reserve space for the memprof frame table offset.
+ OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
+ OS.write(0ULL); // Reserve space for the memprof call stack table offset.
+
+ auto Schema = memprof::PortableMemInfoBlock::getSchema();
+ writeMemProfSchema(OS, Schema);
+
+ uint64_t RecordTableOffset =
+ writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version2);
+
+ uint64_t FramePayloadOffset = OS.tell();
+ uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
+
+ uint64_t CallStackPayloadOffset = OS.tell();
+ uint64_t CallStackTableOffset =
+ writeMemProfCallStacks(OS, MemProfCallStackData);
+
+ uint64_t Header[] = {
+ RecordTableOffset, FramePayloadOffset, FrameTableOffset,
+ CallStackPayloadOffset, CallStackTableOffset,
+ };
+ OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});
+
+ return Error::success();
+}
+
// The MemProf profile data includes a simple schema
// with the format described below followed by the hashtable:
// uint64_t Version
@@ -530,6 +604,8 @@ static Error writeMemProf(
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
&MemProfRecordData,
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
+ llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+ &MemProfCallStackData,
memprof::IndexedVersion MemProfVersionRequested) {
switch (MemProfVersionRequested) {
@@ -538,8 +614,8 @@ static Error writeMemProf(
case memprof::Version1:
return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData);
case memprof::Version2:
- // TODO: Implement. Fall through to the error handling below for now.
- break;
+ return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData,
+ MemProfCallStackData);
}
return make_error<InstrProfError>(
@@ -658,7 +734,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
MemProfSectionStart = OS.tell();
if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData,
- MemProfVersionRequested))
+ MemProfCallStackData, MemProfVersionRequested))
return E;
}
diff --git a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
index 68132961eb78da..03ccbdd42efdad 100644
--- a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
+++ b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
@@ -13,6 +13,9 @@ RUN: llvm-profdata show %t.prof.v0 | FileCheck %s
RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=1 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v1
RUN: llvm-profdata show %t.prof.v1 | FileCheck %s
+RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
+RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
+
For now we only check the validity of the instrumented profile since we don't
have a way to display the contents of the memprof indexed format yet.
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 66a4cea0353479..78daf9f7dc109a 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -305,7 +305,8 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
cl::desc("Specify the version of the memprof format to use"),
cl::init(memprof::Version0),
cl::values(clEnumValN(memprof::Version0, "0", "version 0"),
- clEnumValN(memprof::Version1, "1", "version 1")));
+ clEnumValN(memprof::Version1, "1", "version 1"),
+ clEnumValN(memprof::Version2, "2", "version 2")));
// Options specific to overlap subcommand.
cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
@@ -677,6 +678,18 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
if (!Succeeded)
return;
}
+
+ // Add the call stacks into the writer context.
+ const auto &CSIdToCallStacks = Reader->getCallStacks();
+ for (const auto &I : CSIdToCallStacks) {
+ bool Succeeded = WC->Writer.addMemProfCallStack(
+ /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
+ // If we weren't able to add the call stacks then it doesn't make sense
+ // to try to add the records from this profile.
+ if (!Succeeded)
+ return;
+ }
+
const auto &FunctionProfileData = Reader->getProfileData();
// Add the memprof records into the writer context.
for (const auto &[GUID, Record] : FunctionProfileData) {
>From 90c2125bf08a212df29ce82e448f5285262d245f Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 17 Apr 2024 15:08:05 -0700
Subject: [PATCH 2/4] Address comments.
---
llvm/include/llvm/ProfileData/MemProf.h | 4 ++--
llvm/lib/ProfileData/InstrProfReader.cpp | 2 +-
llvm/lib/ProfileData/InstrProfWriter.cpp | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 07f03fe1ac57f2..55548e3dde17cd 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -653,7 +653,7 @@ class CallStackWriterTrait {
endian::Writer LE(Out, llvm::endianness::little);
offset_type N = sizeof(K);
LE.write<offset_type>(N);
- offset_type M = sizeof(CallStackId) * V.size();
+ offset_type M = sizeof(FrameId) * V.size();
LE.write<offset_type>(M);
return std::make_pair(N, M);
}
@@ -712,7 +712,7 @@ class CallStackLookupTrait {
using namespace support;
llvm::SmallVector<FrameId> CS;
// Derive the number of frames from the data length.
- uint64_t NumFrames = Length / sizeof(CallStackId);
+ uint64_t NumFrames = Length / sizeof(FrameId);
assert(Length % sizeof(CallStackId) == 0);
CS.reserve(NumFrames);
for (size_t I = 0; I != NumFrames; ++I) {
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index ef509b821ff71b..1e42ae88adfe0a 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1531,7 +1531,7 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
llvm::SmallVector<memprof::Frame> Frames;
auto CSIter = MemProfCallStackTable->find(CSId);
if (CSIter == MemProfCallStackTable->end()) {
- LastUnmappedFrameId = CSId;
+ LastUnmappedCSId = CSId;
} else {
const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
Frames.reserve(CS.size());
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 6e01e52e5905ee..f70a49778e752b 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -301,12 +301,12 @@ bool InstrProfWriter::addMemProfCallStack(
const memprof::CallStackId CSId,
const llvm::SmallVector<memprof::FrameId> &CallStack,
function_ref<void(Error)> Warn) {
- auto Result = MemProfCallStackData.insert({CSId, CallStack});
+ auto [Iter, Inserted] = MemProfCallStackData.insert({CSId, CallStack});
// If a mapping already exists for the current call stack id and it does not
// match the new mapping provided then reset the existing contents and bail
// out. We don't support the merging of memprof data whose CallStack -> Id
// mapping across profiles is inconsistent.
- if (!Result.second && Result.first->second != CallStack) {
+ if (!Inserted && Iter->second != CallStack) {
Warn(make_error<InstrProfError>(instrprof_error::malformed,
"call stack to id mapping mismatch"));
return false;
>From 57fe152f3f1067c2e75785f03af94b37861dad0e Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 17 Apr 2024 15:57:01 -0700
Subject: [PATCH 3/4] Add MemProfFileTest.cpp
---
.../llvm/ProfileData/InstrProfWriter.h | 3 +
llvm/unittests/ProfileData/InstrProfTest.cpp | 87 ++++++++++++++++++-
2 files changed, 89 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 66372a2253a1e7..0e83076611c18f 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -200,6 +200,9 @@ class InstrProfWriter {
// Internal interface for testing purpose only.
void setValueProfDataEndianness(llvm::endianness Endianness);
void setOutputSparse(bool Sparse);
+ void setMemProfVersionRequested(memprof::IndexedVersion Version) {
+ MemProfVersionRequested = Version;
+ }
// Compute the overlap b/w this object and Other. Program level result is
// stored in Overlap and function level result is stored in FuncLevelOverlap.
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 82701c47daf71c..08dfcedad02724 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -347,6 +347,9 @@ using ::llvm::memprof::IndexedMemProfRecord;
using ::llvm::memprof::MemInfoBlock;
using FrameIdMapTy =
llvm::DenseMap<::llvm::memprof::FrameId, ::llvm::memprof::Frame>;
+using CallStackIdMapTy =
+ llvm::DenseMap<::llvm::memprof::CallStackId,
+ ::llvm::SmallVector<::llvm::memprof::FrameId>>;
static FrameIdMapTy getFrameMapping() {
FrameIdMapTy Mapping;
@@ -359,6 +362,14 @@ static FrameIdMapTy getFrameMapping() {
return Mapping;
}
+static CallStackIdMapTy getCallStackMapping() {
+ CallStackIdMapTy Mapping;
+ Mapping.insert({0x111, {0, 1}});
+ Mapping.insert({0x222, {2, 3}});
+ Mapping.insert({0x333, {4, 5}});
+ return Mapping;
+}
+
IndexedMemProfRecord makeRecord(
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
AllocFrames,
@@ -374,6 +385,19 @@ IndexedMemProfRecord makeRecord(
return MR;
}
+IndexedMemProfRecord
+makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
+ std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames,
+ const MemInfoBlock &Block = MemInfoBlock()) {
+ llvm::memprof::IndexedMemProfRecord MR;
+ for (const auto &CSId : AllocFrames)
+ MR.AllocSites.emplace_back(::llvm::SmallVector<memprof::FrameId>(), CSId,
+ Block);
+ for (const auto &CSId : CallSiteFrames)
+ MR.CallSiteIds.push_back(CSId);
+ return MR;
+}
+
MATCHER_P(EqualsRecord, Want, "") {
const memprof::MemProfRecord &Got = arg;
@@ -408,7 +432,7 @@ MATCHER_P(EqualsRecord, Want, "") {
return true;
}
-TEST_F(InstrProfTest, test_memprof) {
+TEST_F(InstrProfTest, test_memprof_v0) {
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());
@@ -450,6 +474,67 @@ TEST_F(InstrProfTest, test_memprof) {
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}
+TEST_F(InstrProfTest, test_memprof_v2) {
+ Writer.setMemProfVersionRequested(memprof::Version2);
+
+ ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
+ Succeeded());
+
+ const IndexedMemProfRecord IndexedMR = makeRecordV2(
+ /*AllocFrames=*/{0x111, 0x222},
+ /*CallSiteFrames=*/{0x333});
+ const FrameIdMapTy IdToFrameMap = getFrameMapping();
+ const auto CSIdToCallStackMap = getCallStackMapping();
+ for (const auto &I : IdToFrameMap) {
+ Writer.addMemProfFrame(I.first, I.getSecond(), Err);
+ }
+ for (const auto &I : CSIdToCallStackMap) {
+ Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
+ }
+ Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
+
+ auto Profile = Writer.writeBuffer();
+ readProfile(std::move(Profile));
+
+ auto RecordOr = Reader->getMemProfRecord(0x9999);
+ ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
+ const memprof::MemProfRecord &Record = RecordOr.get();
+
+ std::optional<memprof::FrameId> LastUnmappedFrameId;
+ auto IdToFrameCallback = [&](const memprof::FrameId Id) {
+ auto Iter = IdToFrameMap.find(Id);
+ if (Iter == IdToFrameMap.end()) {
+ LastUnmappedFrameId = Id;
+ return memprof::Frame(0, 0, 0, false);
+ }
+ return Iter->second;
+ };
+
+ std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId;
+ auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) {
+ llvm::SmallVector<memprof::Frame> Frames;
+ auto CSIter = CSIdToCallStackMap.find(CSId);
+ if (CSIter == CSIdToCallStackMap.end()) {
+ LastUnmappedCSId = CSId;
+ } else {
+ const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
+ CSIter->getSecond();
+ Frames.reserve(CS.size());
+ for (::llvm::memprof::FrameId Id : CS)
+ Frames.push_back(IdToFrameCallback(Id));
+ }
+ return Frames;
+ };
+
+ const ::llvm::memprof::MemProfRecord WantRecord =
+ IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
+ ASSERT_FALSE(LastUnmappedFrameId.has_value())
+ << "could not map frame id: " << *LastUnmappedFrameId;
+ ASSERT_FALSE(LastUnmappedCSId.has_value())
+ << "could not map call stack id: " << *LastUnmappedCSId;
+ EXPECT_THAT(WantRecord, EqualsRecord(Record));
+}
+
TEST_F(InstrProfTest, test_memprof_getrecord_error) {
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());
>From 57881fa6d98e34fab0d86c97ca51a53024dedd3d Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Thu, 18 Apr 2024 13:48:12 -0700
Subject: [PATCH 4/4] Address comments.
---
llvm/include/llvm/ProfileData/InstrProfWriter.h | 2 +-
llvm/unittests/ProfileData/InstrProfTest.cpp | 6 ++++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 0e83076611c18f..b0ae8f364fcafb 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -197,7 +197,7 @@ class InstrProfWriter {
return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
}
- // Internal interface for testing purpose only.
+ // Internal interfaces for testing purpose only.
void setValueProfDataEndianness(llvm::endianness Endianness);
void setOutputSparse(bool Sparse);
void setMemProfVersionRequested(memprof::IndexedVersion Version) {
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 08dfcedad02724..16b8b50914cc6b 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -391,6 +391,8 @@ makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
const MemInfoBlock &Block = MemInfoBlock()) {
llvm::memprof::IndexedMemProfRecord MR;
for (const auto &CSId : AllocFrames)
+ // We don't populate IndexedAllocationInfo::CallStack because we use it only
+ // in Version0 and Version1.
MR.AllocSites.emplace_back(::llvm::SmallVector<memprof::FrameId>(), CSId,
Block);
for (const auto &CSId : CallSiteFrames)
@@ -528,9 +530,9 @@ TEST_F(InstrProfTest, test_memprof_v2) {
const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
- ASSERT_FALSE(LastUnmappedFrameId.has_value())
+ ASSERT_EQ(LastUnmappedFrameId, std::nullopt)
<< "could not map frame id: " << *LastUnmappedFrameId;
- ASSERT_FALSE(LastUnmappedCSId.has_value())
+ ASSERT_EQ(LastUnmappedCSId, std::nullopt)
<< "could not map call stack id: " << *LastUnmappedCSId;
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}
More information about the llvm-commits
mailing list