[llvm] d89914f - [memprof] Add Version2 of IndexedMemProfRecord serialization (#87455)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 3 21:48:42 PDT 2024
Author: Kazu Hirata
Date: 2024-04-03T21:48:38-07:00
New Revision: d89914f30bc7c180fe349a5aa0f03438ae6c20a4
URL: https://github.com/llvm/llvm-project/commit/d89914f30bc7c180fe349a5aa0f03438ae6c20a4
DIFF: https://github.com/llvm/llvm-project/commit/d89914f30bc7c180fe349a5aa0f03438ae6c20a4.diff
LOG: [memprof] Add Version2 of IndexedMemProfRecord serialization (#87455)
I'm currently developing a new version of the indexed memprof format
where we deduplicate call stacks in IndexedAllocationInfo::CallStack
and IndexedMemProfRecord::CallSites. We refer to call stacks with
integer IDs, namely CallStackId, just as we refer to Frame with
FrameId. The deduplication will cut down the profile file size by 80%
in a large memprof file of mine.
As a step toward the goal, this patch teaches
IndexedMemProfRecord::{serialize,deserialize} to speak Version2. A
subsequent patch will add Version2 support to llvm-profdata.
The essense of the patch is to replace the serialization of a call
stack, a vector of FrameIDs, with that of a CallStackId. That is:
const IndexedAllocationInfo &N = ...;
...
LE.write<uint64_t>(N.CallStack.size());
for (const FrameId &Id : N.CallStack)
LE.write<FrameId>(Id);
becomes:
LE.write<CallStackId>(N.CSId);
Added:
Modified:
llvm/include/llvm/ProfileData/MemProf.h
llvm/lib/ProfileData/InstrProfReader.cpp
llvm/lib/ProfileData/InstrProfWriter.cpp
llvm/lib/ProfileData/MemProf.cpp
llvm/unittests/ProfileData/MemProfTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index ff00900a1466a8..110e6977026413 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t {
Version0 = 0,
// Version 1: Added a version field to the header.
Version1 = 1,
+ // Version 2: Added a call stack table. Under development.
+ Version2 = 2,
};
constexpr uint64_t MinimumSupportedVersion = Version0;
@@ -289,23 +291,14 @@ struct IndexedAllocationInfo {
: CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}
// Returns the size in bytes when this allocation info struct is serialized.
- size_t serializedSize() const {
- return sizeof(uint64_t) + // The number of frames to serialize.
- sizeof(FrameId) * CallStack.size() + // The callstack frame ids.
- PortableMemInfoBlock::serializedSize(); // The size of the payload.
- }
+ size_t serializedSize(IndexedVersion Version) const;
bool operator==(const IndexedAllocationInfo &Other) const {
if (Other.Info != Info)
return false;
- if (Other.CallStack.size() != CallStack.size())
+ if (Other.CSId != CSId)
return false;
-
- for (size_t J = 0; J < Other.CallStack.size(); J++) {
- if (Other.CallStack[J] != CallStack[J])
- return false;
- }
return true;
}
@@ -357,6 +350,9 @@ struct IndexedMemProfRecord {
// inline location list may include additional entries, users should pick
// the last entry in the list with the same function GUID.
llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
+ // Conceptually the same as above. We are going to keep both CallSites and
+ // CallSiteIds while we are transitioning from CallSites to CallSiteIds.
+ llvm::SmallVector<CallStackId> CallSiteIds;
void clear() {
AllocSites.clear();
@@ -370,47 +366,31 @@ struct IndexedMemProfRecord {
CallSites.append(Other.CallSites);
}
- size_t serializedSize() const {
- size_t Result = sizeof(GlobalValue::GUID);
- for (const IndexedAllocationInfo &N : AllocSites)
- Result += N.serializedSize();
-
- // The number of callsites we have information for.
- Result += sizeof(uint64_t);
- for (const auto &Frames : CallSites) {
- // The number of frame ids to serialize.
- Result += sizeof(uint64_t);
- Result += Frames.size() * sizeof(FrameId);
- }
- return Result;
- }
+ size_t serializedSize(IndexedVersion Version) const;
bool operator==(const IndexedMemProfRecord &Other) const {
if (Other.AllocSites.size() != AllocSites.size())
return false;
- if (Other.CallSites.size() != CallSites.size())
- return false;
-
for (size_t I = 0; I < AllocSites.size(); I++) {
if (AllocSites[I] != Other.AllocSites[I])
return false;
}
- for (size_t I = 0; I < CallSites.size(); I++) {
- if (CallSites[I] != Other.CallSites[I])
- return false;
- }
+ if (Other.CallSiteIds != CallSiteIds)
+ return false;
return true;
}
// Serializes the memprof records in \p Records to the ostream \p OS based
// on the schema provided in \p Schema.
- void serialize(const MemProfSchema &Schema, raw_ostream &OS);
+ void serialize(const MemProfSchema &Schema, raw_ostream &OS,
+ IndexedVersion Version);
// Deserializes memprof records from the Buffer.
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
- const unsigned char *Buffer);
+ const unsigned char *Buffer,
+ IndexedVersion Version);
// Returns the GUID for the function name after canonicalization. For
// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
@@ -480,7 +460,8 @@ class RecordLookupTrait {
using offset_type = uint64_t;
RecordLookupTrait() = delete;
- RecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
+ RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
+ : Version(V), Schema(S) {}
static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
static uint64_t GetInternalKey(uint64_t K) { return K; }
@@ -507,11 +488,13 @@ class RecordLookupTrait {
data_type ReadData(uint64_t K, const unsigned char *D,
offset_type /*Unused*/) {
- Record = IndexedMemProfRecord::deserialize(Schema, D);
+ Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
return Record;
}
private:
+ // Holds the MemProf version.
+ IndexedVersion Version;
// Holds the memprof schema used to deserialize records.
MemProfSchema Schema;
// Holds the records from one function deserialized from the indexed format.
@@ -519,7 +502,7 @@ class RecordLookupTrait {
};
// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
-class RecordWriterTrait {
+template <IndexedVersion Version> class RecordWriterTrait {
public:
using key_type = uint64_t;
using key_type_ref = uint64_t;
@@ -546,7 +529,7 @@ class RecordWriterTrait {
endian::Writer LE(Out, llvm::endianness::little);
offset_type N = sizeof(K);
LE.write<offset_type>(N);
- offset_type M = V.serializedSize();
+ offset_type M = V.serializedSize(Version);
LE.write<offset_type>(M);
return std::make_pair(N, M);
}
@@ -560,7 +543,7 @@ class RecordWriterTrait {
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
offset_type /*Unused*/) {
assert(Schema != nullptr && "MemProf schema is not initialized!");
- V.serialize(*Schema, Out);
+ V.serialize(*Schema, Out, Version);
// Clear the IndexedMemProfRecord which results in clearing/freeing its
// vectors of allocs and callsites. This is owned by the associated on-disk
// hash table, but unused after this point. See also the comment added to
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 7ac5c561dc0809..884334ed070e8d 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() {
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Ptr,
- /*Base=*/Start, memprof::RecordLookupTrait(Schema)));
+ /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
// Initialize the frame table reader with the payload and bucket offsets.
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index c2c94ba30c6583..a1bc180a53ca34 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
OS.write(static_cast<uint64_t>(Id));
}
- auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
+ auto RecordWriter =
+ std::make_unique<memprof::RecordWriterTrait<memprof::Version1>>();
RecordWriter->Schema = &Schema;
- OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
+ OnDiskChainedHashTableGenerator<
+ memprof::RecordWriterTrait<memprof::Version1>>
RecordTableGenerator;
for (auto &I : MemProfRecordData) {
// Insert the key (func hash) and value (memprof record).
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 6c419811d59e2f..ac0a8702c3f9cf 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -10,15 +10,88 @@
namespace llvm {
namespace memprof {
+namespace {
+size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
+ size_t Size = 0;
+ // The number of frames to serialize.
+ Size += sizeof(uint64_t);
+ // The callstack frame ids.
+ Size += sizeof(FrameId) * IAI.CallStack.size();
+ // The size of the payload.
+ Size += PortableMemInfoBlock::serializedSize();
+ return Size;
+}
-void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
- raw_ostream &OS) {
+size_t serializedSizeV2(const IndexedAllocationInfo &IAI) {
+ size_t Size = 0;
+ // The CallStackId
+ Size += sizeof(CallStackId);
+ // The size of the payload.
+ Size += PortableMemInfoBlock::serializedSize();
+ return Size;
+}
+} // namespace
+
+size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
+ switch (Version) {
+ case Version0:
+ case Version1:
+ return serializedSizeV0(*this);
+ case Version2:
+ return serializedSizeV2(*this);
+ }
+ llvm_unreachable("unsupported MemProf version");
+}
+
+namespace {
+size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
+ size_t Result = sizeof(GlobalValue::GUID);
+ for (const IndexedAllocationInfo &N : Record.AllocSites)
+ Result += N.serializedSize(Version0);
+
+ // The number of callsites we have information for.
+ Result += sizeof(uint64_t);
+ for (const auto &Frames : Record.CallSites) {
+ // The number of frame ids to serialize.
+ Result += sizeof(uint64_t);
+ Result += Frames.size() * sizeof(FrameId);
+ }
+ return Result;
+}
+
+size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
+ size_t Result = sizeof(GlobalValue::GUID);
+ for (const IndexedAllocationInfo &N : Record.AllocSites)
+ Result += N.serializedSize(Version2);
+
+ // The number of callsites we have information for.
+ Result += sizeof(uint64_t);
+ // The CallStackId
+ Result += Record.CallSiteIds.size() * sizeof(CallStackId);
+ return Result;
+}
+} // namespace
+
+size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const {
+ switch (Version) {
+ case Version0:
+ case Version1:
+ return serializedSizeV0(*this);
+ case Version2:
+ return serializedSizeV2(*this);
+ }
+ llvm_unreachable("unsupported MemProf version");
+}
+
+namespace {
+void serializeV0(const IndexedMemProfRecord &Record,
+ const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;
endian::Writer LE(OS, llvm::endianness::little);
- LE.write<uint64_t>(AllocSites.size());
- for (const IndexedAllocationInfo &N : AllocSites) {
+ LE.write<uint64_t>(Record.AllocSites.size());
+ for (const IndexedAllocationInfo &N : Record.AllocSites) {
LE.write<uint64_t>(N.CallStack.size());
for (const FrameId &Id : N.CallStack)
LE.write<FrameId>(Id);
@@ -26,17 +99,50 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
}
// Related contexts.
- LE.write<uint64_t>(CallSites.size());
- for (const auto &Frames : CallSites) {
+ LE.write<uint64_t>(Record.CallSites.size());
+ for (const auto &Frames : Record.CallSites) {
LE.write<uint64_t>(Frames.size());
for (const FrameId &Id : Frames)
LE.write<FrameId>(Id);
}
}
-IndexedMemProfRecord
-IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
- const unsigned char *Ptr) {
+void serializeV2(const IndexedMemProfRecord &Record,
+ const MemProfSchema &Schema, raw_ostream &OS) {
+ using namespace support;
+
+ endian::Writer LE(OS, llvm::endianness::little);
+
+ LE.write<uint64_t>(Record.AllocSites.size());
+ for (const IndexedAllocationInfo &N : Record.AllocSites) {
+ LE.write<CallStackId>(N.CSId);
+ N.Info.serialize(Schema, OS);
+ }
+
+ // Related contexts.
+ LE.write<uint64_t>(Record.CallSiteIds.size());
+ for (const auto &CSId : Record.CallSiteIds)
+ LE.write<CallStackId>(CSId);
+}
+} // namespace
+
+void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
+ raw_ostream &OS, IndexedVersion Version) {
+ switch (Version) {
+ case Version0:
+ case Version1:
+ serializeV0(*this, Schema, OS);
+ return;
+ case Version2:
+ serializeV2(*this, Schema, OS);
+ return;
+ }
+ llvm_unreachable("unsupported MemProf version");
+}
+
+namespace {
+IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
+ const unsigned char *Ptr) {
using namespace support;
IndexedMemProfRecord Record;
@@ -73,11 +179,57 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
Frames.push_back(Id);
}
Record.CallSites.push_back(Frames);
+ Record.CallSiteIds.push_back(hashCallStack(Frames));
}
return Record;
}
+IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
+ const unsigned char *Ptr) {
+ using namespace support;
+
+ IndexedMemProfRecord Record;
+
+ // Read the meminfo nodes.
+ const uint64_t NumNodes =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ for (uint64_t I = 0; I < NumNodes; I++) {
+ IndexedAllocationInfo Node;
+ Node.CSId =
+ endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
+ Node.Info.deserialize(Schema, Ptr);
+ Ptr += PortableMemInfoBlock::serializedSize();
+ Record.AllocSites.push_back(Node);
+ }
+
+ // Read the callsite information.
+ const uint64_t NumCtxs =
+ endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+ for (uint64_t J = 0; J < NumCtxs; J++) {
+ CallStackId CSId =
+ endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
+ Record.CallSiteIds.push_back(CSId);
+ }
+
+ return Record;
+}
+} // namespace
+
+IndexedMemProfRecord
+IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
+ const unsigned char *Ptr,
+ IndexedVersion Version) {
+ switch (Version) {
+ case Version0:
+ case Version1:
+ return deserializeV0(Schema, Ptr);
+ case Version2:
+ return deserializeV2(Schema, Ptr);
+ }
+ llvm_unreachable("unsupported MemProf version");
+}
+
GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
// Canonicalize the function name to drop suffixes such as ".llvm.". Note
// we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 1cca44e9b03707..f1aa6f37aa399f 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -265,7 +265,9 @@ TEST(MemProf, PortableWrapper) {
EXPECT_EQ(3UL, ReadBlock.getAllocCpuId());
}
-TEST(MemProf, RecordSerializationRoundTrip) {
+// Version0 and Version1 serialize IndexedMemProfRecord in the same format, so
+// we share one test.
+TEST(MemProf, RecordSerializationRoundTripVersion0And1) {
const MemProfSchema Schema = getFullSchema();
MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
@@ -284,14 +286,47 @@ TEST(MemProf, RecordSerializationRoundTrip) {
Info);
}
Record.CallSites.assign(CallSites);
+ for (const auto &CS : CallSites)
+ Record.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS));
std::string Buffer;
llvm::raw_string_ostream OS(Buffer);
- Record.serialize(Schema, OS);
+ Record.serialize(Schema, OS, llvm::memprof::Version0);
OS.flush();
const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
- Schema, reinterpret_cast<const unsigned char *>(Buffer.data()));
+ Schema, reinterpret_cast<const unsigned char *>(Buffer.data()),
+ llvm::memprof::Version0);
+
+ EXPECT_EQ(Record, GotRecord);
+}
+
+TEST(MemProf, RecordSerializationRoundTripVerion2) {
+ const MemProfSchema Schema = getFullSchema();
+
+ MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
+ /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
+ /*dealloc_cpu=*/4);
+
+ llvm::SmallVector<llvm::memprof::CallStackId> CallStackIds = {0x123, 0x456};
+
+ llvm::SmallVector<llvm::memprof::CallStackId> CallSiteIds = {0x333, 0x444};
+
+ IndexedMemProfRecord Record;
+ for (const auto &CSId : CallStackIds) {
+ // Use the same info block for both allocation sites.
+ Record.AllocSites.emplace_back(llvm::SmallVector<FrameId>(), CSId, Info);
+ }
+ Record.CallSiteIds.assign(CallSiteIds);
+
+ std::string Buffer;
+ llvm::raw_string_ostream OS(Buffer);
+ Record.serialize(Schema, OS, llvm::memprof::Version2);
+ OS.flush();
+
+ const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
+ Schema, reinterpret_cast<const unsigned char *>(Buffer.data()),
+ llvm::memprof::Version2);
EXPECT_EQ(Record, GotRecord);
}
More information about the llvm-commits
mailing list