[llvm] [memprof] Omit the key/data lengths for the frame table (PR #89711)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 00:18:57 PDT 2024


https://github.com/kazutakahirata created https://github.com/llvm/llvm-project/pull/89711

The frame table has constant key/data lengths, so we don't need to
serialize or deserialize them for every key-data pair.  Omitting the
key/data lengths saves 0.21% of the indexed MemProf file size.

Note that it's OK to change the format because Version2 is still under
development.


>From edd93f3bde7d36ddbda586fffb971724f2711872 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Mon, 22 Apr 2024 23:26:02 -0700
Subject: [PATCH] [memprof] Omit the key/data lengths for the frame table

The frame table has constant key/data lengths, so we don't need to
serialize or deserialize them for every key-data pair.  Omitting the
key/data lengths saves 0.21% of the indexed MemProf file size.

Note that it's OK to change the format because Version2 is still under
development.
---
 llvm/include/llvm/ProfileData/MemProf.h  | 36 +++++++++++++++++++-----
 llvm/lib/ProfileData/InstrProfReader.cpp |  2 +-
 llvm/lib/ProfileData/InstrProfWriter.cpp | 17 +++++++----
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index f356e3a54a3645..7ba2a4b7a507fb 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -578,14 +578,21 @@ class FrameWriterTrait {
 
   static hash_value_type ComputeHash(key_type_ref K) { return K; }
 
-  static std::pair<offset_type, offset_type>
+  FrameWriterTrait() = delete;
+  FrameWriterTrait(IndexedVersion Version) : Version(Version) {}
+
+  std::pair<offset_type, offset_type>
   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
     using namespace support;
     endian::Writer LE(Out, llvm::endianness::little);
     offset_type N = sizeof(K);
-    LE.write<offset_type>(N);
-    offset_type M = V.serializedSize();
-    LE.write<offset_type>(M);
+    offset_type M = Frame::serializedSize();
+    // Starting with Version2, we do not explicitly emit the key/data lengths
+    // because they are constants.
+    if (Version < Version2) {
+      LE.write<offset_type>(N);
+      LE.write<offset_type>(M);
+    }
     return std::make_pair(N, M);
   }
 
@@ -599,6 +606,10 @@ class FrameWriterTrait {
                 offset_type /*Unused*/) {
     V.serialize(Out);
   }
+
+private:
+  // Holds the MemProf version.
+  IndexedVersion Version;
 };
 
 // Trait for reading frame mappings from the on-disk hash table.
@@ -610,6 +621,9 @@ class FrameLookupTrait {
   using hash_value_type = FrameId;
   using offset_type = uint64_t;
 
+  FrameLookupTrait() = delete;
+  FrameLookupTrait(IndexedVersion Version) : Version(Version) {}
+
   static bool EqualKey(internal_key_type A, internal_key_type B) {
     return A == B;
   }
@@ -618,14 +632,18 @@ class FrameLookupTrait {
 
   hash_value_type ComputeHash(internal_key_type K) { return K; }
 
-  static std::pair<offset_type, offset_type>
+  std::pair<offset_type, offset_type>
   ReadKeyDataLength(const unsigned char *&D) {
     using namespace support;
 
     offset_type KeyLen =
-        endian::readNext<offset_type, llvm::endianness::little>(D);
+        Version < Version2
+            ? endian::readNext<offset_type, llvm::endianness::little>(D)
+            : sizeof(FrameId);
     offset_type DataLen =
-        endian::readNext<offset_type, llvm::endianness::little>(D);
+        Version < Version2
+            ? endian::readNext<offset_type, llvm::endianness::little>(D)
+            : Frame::serializedSize();
     return std::make_pair(KeyLen, DataLen);
   }
 
@@ -638,6 +656,10 @@ class FrameLookupTrait {
                      offset_type /*Unused*/) {
     return Frame::deserialize(D);
   }
+
+private:
+  // Holds the MemProf version.
+  IndexedVersion Version;
 };
 
 // Trait for writing call stacks to the on-disk hash table.
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index cefb6af12d0021..df0c43a0386958 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1272,7 +1272,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
   MemProfFrameTable.reset(MemProfFrameHashTable::Create(
       /*Buckets=*/Start + FrameTableOffset,
       /*Payload=*/Start + FramePayloadOffset,
-      /*Base=*/Start));
+      /*Base=*/Start, memprof::FrameLookupTrait(Version)));
 
   if (Version >= memprof::Version2)
     MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 4a6fc9d64b6900..1a7edf18801218 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -470,17 +470,19 @@ static uint64_t writeMemProfRecords(
 // Serialize MemProfFrameData.  Return FrameTableOffset.
 static uint64_t writeMemProfFrames(
     ProfOStream &OS,
-    llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
+    llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
+    memprof::IndexedVersion Version) {
+  memprof::FrameWriterTrait FrameWriter(Version);
   OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
       FrameTableGenerator;
   for (auto &[FrameId, Frame] : MemProfFrameData) {
     // Insert the key (frame id) and value (frame contents).
-    FrameTableGenerator.insert(FrameId, Frame);
+    FrameTableGenerator.insert(FrameId, Frame, FrameWriter);
   }
   // Release the memory of this MapVector as it is no longer needed.
   MemProfFrameData.clear();
 
-  return FrameTableGenerator.Emit(OS.OS);
+  return FrameTableGenerator.Emit(OS.OS, FrameWriter);
 }
 
 static uint64_t writeMemProfCallStacks(
@@ -514,7 +516,8 @@ static Error writeMemProfV0(
       writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version0);
 
   uint64_t FramePayloadOffset = OS.tell();
-  uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
+  uint64_t FrameTableOffset =
+      writeMemProfFrames(OS, MemProfFrameData, memprof::Version0);
 
   uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};
   OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});
@@ -540,7 +543,8 @@ static Error writeMemProfV1(
       writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version1);
 
   uint64_t FramePayloadOffset = OS.tell();
-  uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
+  uint64_t FrameTableOffset =
+      writeMemProfFrames(OS, MemProfFrameData, memprof::Version1);
 
   uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};
   OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});
@@ -570,7 +574,8 @@ static Error writeMemProfV2(
       writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version2);
 
   uint64_t FramePayloadOffset = OS.tell();
-  uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
+  uint64_t FrameTableOffset =
+      writeMemProfFrames(OS, MemProfFrameData, memprof::Version2);
 
   uint64_t CallStackPayloadOffset = OS.tell();
   uint64_t CallStackTableOffset =



More information about the llvm-commits mailing list