[llvm] d89914f - [memprof] Add Version2 of IndexedMemProfRecord serialization (#87455)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 3 21:48:42 PDT 2024


Author: Kazu Hirata
Date: 2024-04-03T21:48:38-07:00
New Revision: d89914f30bc7c180fe349a5aa0f03438ae6c20a4

URL: https://github.com/llvm/llvm-project/commit/d89914f30bc7c180fe349a5aa0f03438ae6c20a4
DIFF: https://github.com/llvm/llvm-project/commit/d89914f30bc7c180fe349a5aa0f03438ae6c20a4.diff

LOG: [memprof] Add Version2 of IndexedMemProfRecord serialization (#87455)

I'm currently developing a new version of the indexed memprof format
where we deduplicate call stacks in IndexedAllocationInfo::CallStack
and IndexedMemProfRecord::CallSites.  We refer to call stacks with
integer IDs, namely CallStackId, just as we refer to Frame with
FrameId.  The deduplication will cut down the profile file size by 80%
in a large memprof file of mine.

As a step toward the goal, this patch teaches
IndexedMemProfRecord::{serialize,deserialize} to speak Version2.  A
subsequent patch will add Version2 support to llvm-profdata.

The essense of the patch is to replace the serialization of a call
stack, a vector of FrameIDs, with that of a CallStackId.  That is:

  const IndexedAllocationInfo &N = ...;
  ...
  LE.write<uint64_t>(N.CallStack.size());
  for (const FrameId &Id : N.CallStack)
    LE.write<FrameId>(Id);

becomes:

  LE.write<CallStackId>(N.CSId);

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/lib/ProfileData/InstrProfReader.cpp
    llvm/lib/ProfileData/InstrProfWriter.cpp
    llvm/lib/ProfileData/MemProf.cpp
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index ff00900a1466a8..110e6977026413 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t {
   Version0 = 0,
   // Version 1: Added a version field to the header.
   Version1 = 1,
+  // Version 2: Added a call stack table.  Under development.
+  Version2 = 2,
 };
 
 constexpr uint64_t MinimumSupportedVersion = Version0;
@@ -289,23 +291,14 @@ struct IndexedAllocationInfo {
       : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}
 
   // Returns the size in bytes when this allocation info struct is serialized.
-  size_t serializedSize() const {
-    return sizeof(uint64_t) + // The number of frames to serialize.
-           sizeof(FrameId) * CallStack.size() +    // The callstack frame ids.
-           PortableMemInfoBlock::serializedSize(); // The size of the payload.
-  }
+  size_t serializedSize(IndexedVersion Version) const;
 
   bool operator==(const IndexedAllocationInfo &Other) const {
     if (Other.Info != Info)
       return false;
 
-    if (Other.CallStack.size() != CallStack.size())
+    if (Other.CSId != CSId)
       return false;
-
-    for (size_t J = 0; J < Other.CallStack.size(); J++) {
-      if (Other.CallStack[J] != CallStack[J])
-        return false;
-    }
     return true;
   }
 
@@ -357,6 +350,9 @@ struct IndexedMemProfRecord {
   // inline location list may include additional entries, users should pick
   // the last entry in the list with the same function GUID.
   llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
+  // Conceptually the same as above.  We are going to keep both CallSites and
+  // CallSiteIds while we are transitioning from CallSites to CallSiteIds.
+  llvm::SmallVector<CallStackId> CallSiteIds;
 
   void clear() {
     AllocSites.clear();
@@ -370,47 +366,31 @@ struct IndexedMemProfRecord {
     CallSites.append(Other.CallSites);
   }
 
-  size_t serializedSize() const {
-    size_t Result = sizeof(GlobalValue::GUID);
-    for (const IndexedAllocationInfo &N : AllocSites)
-      Result += N.serializedSize();
-
-    // The number of callsites we have information for.
-    Result += sizeof(uint64_t);
-    for (const auto &Frames : CallSites) {
-      // The number of frame ids to serialize.
-      Result += sizeof(uint64_t);
-      Result += Frames.size() * sizeof(FrameId);
-    }
-    return Result;
-  }
+  size_t serializedSize(IndexedVersion Version) const;
 
   bool operator==(const IndexedMemProfRecord &Other) const {
     if (Other.AllocSites.size() != AllocSites.size())
       return false;
 
-    if (Other.CallSites.size() != CallSites.size())
-      return false;
-
     for (size_t I = 0; I < AllocSites.size(); I++) {
       if (AllocSites[I] != Other.AllocSites[I])
         return false;
     }
 
-    for (size_t I = 0; I < CallSites.size(); I++) {
-      if (CallSites[I] != Other.CallSites[I])
-        return false;
-    }
+    if (Other.CallSiteIds != CallSiteIds)
+      return false;
     return true;
   }
 
   // Serializes the memprof records in \p Records to the ostream \p OS based
   // on the schema provided in \p Schema.
-  void serialize(const MemProfSchema &Schema, raw_ostream &OS);
+  void serialize(const MemProfSchema &Schema, raw_ostream &OS,
+                 IndexedVersion Version);
 
   // Deserializes memprof records from the Buffer.
   static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
-                                          const unsigned char *Buffer);
+                                          const unsigned char *Buffer,
+                                          IndexedVersion Version);
 
   // Returns the GUID for the function name after canonicalization. For
   // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
@@ -480,7 +460,8 @@ class RecordLookupTrait {
   using offset_type = uint64_t;
 
   RecordLookupTrait() = delete;
-  RecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
+  RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
+      : Version(V), Schema(S) {}
 
   static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
   static uint64_t GetInternalKey(uint64_t K) { return K; }
@@ -507,11 +488,13 @@ class RecordLookupTrait {
 
   data_type ReadData(uint64_t K, const unsigned char *D,
                      offset_type /*Unused*/) {
-    Record = IndexedMemProfRecord::deserialize(Schema, D);
+    Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
     return Record;
   }
 
 private:
+  // Holds the MemProf version.
+  IndexedVersion Version;
   // Holds the memprof schema used to deserialize records.
   MemProfSchema Schema;
   // Holds the records from one function deserialized from the indexed format.
@@ -519,7 +502,7 @@ class RecordLookupTrait {
 };
 
 // Trait for writing IndexedMemProfRecord data to the on-disk hash table.
-class RecordWriterTrait {
+template <IndexedVersion Version> class RecordWriterTrait {
 public:
   using key_type = uint64_t;
   using key_type_ref = uint64_t;
@@ -546,7 +529,7 @@ class RecordWriterTrait {
     endian::Writer LE(Out, llvm::endianness::little);
     offset_type N = sizeof(K);
     LE.write<offset_type>(N);
-    offset_type M = V.serializedSize();
+    offset_type M = V.serializedSize(Version);
     LE.write<offset_type>(M);
     return std::make_pair(N, M);
   }
@@ -560,7 +543,7 @@ class RecordWriterTrait {
   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
                 offset_type /*Unused*/) {
     assert(Schema != nullptr && "MemProf schema is not initialized!");
-    V.serialize(*Schema, Out);
+    V.serialize(*Schema, Out, Version);
     // Clear the IndexedMemProfRecord which results in clearing/freeing its
     // vectors of allocs and callsites. This is owned by the associated on-disk
     // hash table, but unused after this point. See also the comment added to

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 7ac5c561dc0809..884334ed070e8d 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() {
     MemProfRecordTable.reset(MemProfRecordHashTable::Create(
         /*Buckets=*/Start + RecordTableOffset,
         /*Payload=*/Ptr,
-        /*Base=*/Start, memprof::RecordLookupTrait(Schema)));
+        /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
 
     // Initialize the frame table reader with the payload and bucket offsets.
     MemProfFrameTable.reset(MemProfFrameHashTable::Create(

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index c2c94ba30c6583..a1bc180a53ca34 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
       OS.write(static_cast<uint64_t>(Id));
     }
 
-    auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
+    auto RecordWriter =
+        std::make_unique<memprof::RecordWriterTrait<memprof::Version1>>();
     RecordWriter->Schema = &Schema;
-    OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
+    OnDiskChainedHashTableGenerator<
+        memprof::RecordWriterTrait<memprof::Version1>>
         RecordTableGenerator;
     for (auto &I : MemProfRecordData) {
       // Insert the key (func hash) and value (memprof record).

diff  --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 6c419811d59e2f..ac0a8702c3f9cf 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -10,15 +10,88 @@
 
 namespace llvm {
 namespace memprof {
+namespace {
+size_t serializedSizeV0(const IndexedAllocationInfo &IAI) {
+  size_t Size = 0;
+  // The number of frames to serialize.
+  Size += sizeof(uint64_t);
+  // The callstack frame ids.
+  Size += sizeof(FrameId) * IAI.CallStack.size();
+  // The size of the payload.
+  Size += PortableMemInfoBlock::serializedSize();
+  return Size;
+}
 
-void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
-                                     raw_ostream &OS) {
+size_t serializedSizeV2(const IndexedAllocationInfo &IAI) {
+  size_t Size = 0;
+  // The CallStackId
+  Size += sizeof(CallStackId);
+  // The size of the payload.
+  Size += PortableMemInfoBlock::serializedSize();
+  return Size;
+}
+} // namespace
+
+size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const {
+  switch (Version) {
+  case Version0:
+  case Version1:
+    return serializedSizeV0(*this);
+  case Version2:
+    return serializedSizeV2(*this);
+  }
+  llvm_unreachable("unsupported MemProf version");
+}
+
+namespace {
+size_t serializedSizeV0(const IndexedMemProfRecord &Record) {
+  size_t Result = sizeof(GlobalValue::GUID);
+  for (const IndexedAllocationInfo &N : Record.AllocSites)
+    Result += N.serializedSize(Version0);
+
+  // The number of callsites we have information for.
+  Result += sizeof(uint64_t);
+  for (const auto &Frames : Record.CallSites) {
+    // The number of frame ids to serialize.
+    Result += sizeof(uint64_t);
+    Result += Frames.size() * sizeof(FrameId);
+  }
+  return Result;
+}
+
+size_t serializedSizeV2(const IndexedMemProfRecord &Record) {
+  size_t Result = sizeof(GlobalValue::GUID);
+  for (const IndexedAllocationInfo &N : Record.AllocSites)
+    Result += N.serializedSize(Version2);
+
+  // The number of callsites we have information for.
+  Result += sizeof(uint64_t);
+  // The CallStackId
+  Result += Record.CallSiteIds.size() * sizeof(CallStackId);
+  return Result;
+}
+} // namespace
+
+size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const {
+  switch (Version) {
+  case Version0:
+  case Version1:
+    return serializedSizeV0(*this);
+  case Version2:
+    return serializedSizeV2(*this);
+  }
+  llvm_unreachable("unsupported MemProf version");
+}
+
+namespace {
+void serializeV0(const IndexedMemProfRecord &Record,
+                 const MemProfSchema &Schema, raw_ostream &OS) {
   using namespace support;
 
   endian::Writer LE(OS, llvm::endianness::little);
 
-  LE.write<uint64_t>(AllocSites.size());
-  for (const IndexedAllocationInfo &N : AllocSites) {
+  LE.write<uint64_t>(Record.AllocSites.size());
+  for (const IndexedAllocationInfo &N : Record.AllocSites) {
     LE.write<uint64_t>(N.CallStack.size());
     for (const FrameId &Id : N.CallStack)
       LE.write<FrameId>(Id);
@@ -26,17 +99,50 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
   }
 
   // Related contexts.
-  LE.write<uint64_t>(CallSites.size());
-  for (const auto &Frames : CallSites) {
+  LE.write<uint64_t>(Record.CallSites.size());
+  for (const auto &Frames : Record.CallSites) {
     LE.write<uint64_t>(Frames.size());
     for (const FrameId &Id : Frames)
       LE.write<FrameId>(Id);
   }
 }
 
-IndexedMemProfRecord
-IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
-                                  const unsigned char *Ptr) {
+void serializeV2(const IndexedMemProfRecord &Record,
+                 const MemProfSchema &Schema, raw_ostream &OS) {
+  using namespace support;
+
+  endian::Writer LE(OS, llvm::endianness::little);
+
+  LE.write<uint64_t>(Record.AllocSites.size());
+  for (const IndexedAllocationInfo &N : Record.AllocSites) {
+    LE.write<CallStackId>(N.CSId);
+    N.Info.serialize(Schema, OS);
+  }
+
+  // Related contexts.
+  LE.write<uint64_t>(Record.CallSiteIds.size());
+  for (const auto &CSId : Record.CallSiteIds)
+    LE.write<CallStackId>(CSId);
+}
+} // namespace
+
+void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
+                                     raw_ostream &OS, IndexedVersion Version) {
+  switch (Version) {
+  case Version0:
+  case Version1:
+    serializeV0(*this, Schema, OS);
+    return;
+  case Version2:
+    serializeV2(*this, Schema, OS);
+    return;
+  }
+  llvm_unreachable("unsupported MemProf version");
+}
+
+namespace {
+IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
+                                   const unsigned char *Ptr) {
   using namespace support;
 
   IndexedMemProfRecord Record;
@@ -73,11 +179,57 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
       Frames.push_back(Id);
     }
     Record.CallSites.push_back(Frames);
+    Record.CallSiteIds.push_back(hashCallStack(Frames));
   }
 
   return Record;
 }
 
+IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
+                                   const unsigned char *Ptr) {
+  using namespace support;
+
+  IndexedMemProfRecord Record;
+
+  // Read the meminfo nodes.
+  const uint64_t NumNodes =
+      endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+  for (uint64_t I = 0; I < NumNodes; I++) {
+    IndexedAllocationInfo Node;
+    Node.CSId =
+        endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
+    Node.Info.deserialize(Schema, Ptr);
+    Ptr += PortableMemInfoBlock::serializedSize();
+    Record.AllocSites.push_back(Node);
+  }
+
+  // Read the callsite information.
+  const uint64_t NumCtxs =
+      endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+  for (uint64_t J = 0; J < NumCtxs; J++) {
+    CallStackId CSId =
+        endian::readNext<CallStackId, llvm::endianness::little, unaligned>(Ptr);
+    Record.CallSiteIds.push_back(CSId);
+  }
+
+  return Record;
+}
+} // namespace
+
+IndexedMemProfRecord
+IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
+                                  const unsigned char *Ptr,
+                                  IndexedVersion Version) {
+  switch (Version) {
+  case Version0:
+  case Version1:
+    return deserializeV0(Schema, Ptr);
+  case Version2:
+    return deserializeV2(Schema, Ptr);
+  }
+  llvm_unreachable("unsupported MemProf version");
+}
+
 GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
   // Canonicalize the function name to drop suffixes such as ".llvm.". Note
   // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 1cca44e9b03707..f1aa6f37aa399f 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -265,7 +265,9 @@ TEST(MemProf, PortableWrapper) {
   EXPECT_EQ(3UL, ReadBlock.getAllocCpuId());
 }
 
-TEST(MemProf, RecordSerializationRoundTrip) {
+// Version0 and Version1 serialize IndexedMemProfRecord in the same format, so
+// we share one test.
+TEST(MemProf, RecordSerializationRoundTripVersion0And1) {
   const MemProfSchema Schema = getFullSchema();
 
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
@@ -284,14 +286,47 @@ TEST(MemProf, RecordSerializationRoundTrip) {
                                    Info);
   }
   Record.CallSites.assign(CallSites);
+  for (const auto &CS : CallSites)
+    Record.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS));
 
   std::string Buffer;
   llvm::raw_string_ostream OS(Buffer);
-  Record.serialize(Schema, OS);
+  Record.serialize(Schema, OS, llvm::memprof::Version0);
   OS.flush();
 
   const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
-      Schema, reinterpret_cast<const unsigned char *>(Buffer.data()));
+      Schema, reinterpret_cast<const unsigned char *>(Buffer.data()),
+      llvm::memprof::Version0);
+
+  EXPECT_EQ(Record, GotRecord);
+}
+
+TEST(MemProf, RecordSerializationRoundTripVerion2) {
+  const MemProfSchema Schema = getFullSchema();
+
+  MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
+                    /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
+                    /*dealloc_cpu=*/4);
+
+  llvm::SmallVector<llvm::memprof::CallStackId> CallStackIds = {0x123, 0x456};
+
+  llvm::SmallVector<llvm::memprof::CallStackId> CallSiteIds = {0x333, 0x444};
+
+  IndexedMemProfRecord Record;
+  for (const auto &CSId : CallStackIds) {
+    // Use the same info block for both allocation sites.
+    Record.AllocSites.emplace_back(llvm::SmallVector<FrameId>(), CSId, Info);
+  }
+  Record.CallSiteIds.assign(CallSiteIds);
+
+  std::string Buffer;
+  llvm::raw_string_ostream OS(Buffer);
+  Record.serialize(Schema, OS, llvm::memprof::Version2);
+  OS.flush();
+
+  const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
+      Schema, reinterpret_cast<const unsigned char *>(Buffer.data()),
+      llvm::memprof::Version2);
 
   EXPECT_EQ(Record, GotRecord);
 }


        


More information about the llvm-commits mailing list