[llvm] [memprof] Reorder MemProf sections in profile (PR #93640)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Tue May 28 21:11:09 PDT 2024


https://github.com/kazutakahirata created https://github.com/llvm/llvm-project/pull/93640

This patch teaches the V3 format to serialize Frames, call stacks, and
IndexedMemProfRecords, in that order.

I'm planning to use linear IDs for Frames.  That is, Frames will be
numbered 0, 1, 2, and so on in the order we serialize them.  In turn,
we will seialize the call stacks in terms of those linear IDs.

Likewise, I'm planning to use linear IDs for call stacks and then
serialize IndexedMemProfRecords in terms of those linear IDs for call
stacks.

With the new order, we can successively free data structures as we
serialize them.  That is, once we serialize Frames, we can free the
Frames' data proper and just retain mappings from FrameIds to linear
IDs.  A similar story applies to call stacks.

>From bbcca83f622a64911f14dadc96577f486b8c6e53 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Fri, 3 May 2024 15:36:13 -0700
Subject: [PATCH] [memprof] Reorder MemProf sections in profile

This patch teaches the V3 format to serialize Frames, call stacks, and
IndexedMemProfRecords, in that order.

I'm planning to use linear IDs for Frames.  That is, Frames will be
numbered 0, 1, 2, and so on in the order we serialize them.  In turn,
we will seialize the call stacks in terms of those linear IDs.

Likewise, I'm planning to use linear IDs for call stacks and then
serialize IndexedMemProfRecords in terms of those linear IDs for call
stacks.

With the new order, we can successively free data structures as we
serialize them.  That is, once we serialize Frames, we can free the
Frames' data proper and just retain mappings from FrameIds to linear
IDs.  A similar story applies to call stacks.
---
 .../llvm/ProfileData/InstrProfReader.h        |   5 +
 llvm/lib/ProfileData/InstrProfReader.cpp      | 124 ++++++++++++++----
 llvm/lib/ProfileData/InstrProfWriter.cpp      |  22 ++--
 3 files changed, 111 insertions(+), 40 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 46aa1b6c2bfe7..8d475fb048624 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -660,6 +660,11 @@ class IndexedMemProfReader {
   /// MemProf call stack data on-disk indexed via call stack id.
   std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
 
+  Error deserializeV012(const unsigned char *Start, const unsigned char *Ptr,
+                        uint64_t FirstWord, memprof::IndexedVersion Version);
+  Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr,
+                      memprof::IndexedVersion Version);
+
 public:
   IndexedMemProfReader() = default;
 
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 798236c295194..c8dd8b14fd87e 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1202,35 +1202,10 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
   }
 }
 
-Error IndexedMemProfReader::deserialize(const unsigned char *Start,
-                                        uint64_t MemProfOffset) {
-  const unsigned char *Ptr = Start + MemProfOffset;
-
-  // Read the first 64-bit word, which may be RecordTableOffset in
-  // memprof::MemProfVersion0 or the MemProf version number in
-  // memprof::MemProfVersion1 and above.
-  const uint64_t FirstWord =
-      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-
-  if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2 ||
-      FirstWord == memprof::Version3) {
-    // Everything is good.  We can proceed to deserialize the rest.
-    Version = static_cast<memprof::IndexedVersion>(FirstWord);
-  } else if (FirstWord >= 24) {
-    // This is a heuristic/hack to detect memprof::MemProfVersion0,
-    // which does not have a version field in the header.
-    // In memprof::MemProfVersion0, FirstWord will be RecordTableOffset,
-    // which should be at least 24 because of the MemProf header size.
-    Version = memprof::Version0;
-  } else {
-    return make_error<InstrProfError>(
-        instrprof_error::unsupported_version,
-        formatv("MemProf version {} not supported; "
-                "requires version between {} and {}, inclusive",
-                FirstWord, memprof::MinimumSupportedVersion,
-                memprof::MaximumSupportedVersion));
-  }
-
+Error IndexedMemProfReader::deserializeV012(const unsigned char *Start,
+                                            const unsigned char *Ptr,
+                                            uint64_t FirstWord,
+                                            memprof::IndexedVersion Version) {
   // The value returned from RecordTableGenerator.Emit.
   const uint64_t RecordTableOffset =
       Version == memprof::Version0
@@ -1280,6 +1255,97 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
         /*Payload=*/Start + CallStackPayloadOffset,
         /*Base=*/Start));
 
+  return Error::success();
+}
+
+Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
+                                          const unsigned char *Ptr,
+                                          memprof::IndexedVersion Version) {
+  // The value returned from FrameTableGenerator.Emit.
+  const uint64_t FrameTableOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The offset in the stream right before invoking
+  // CallStackTableGenerator.Emit.
+  const uint64_t CallStackPayloadOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The value returned from CallStackTableGenerator.Emit.
+  const uint64_t CallStackTableOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The offset in the stream right before invoking RecordTableGenerator.Emit.
+  const uint64_t RecordPayloadOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The value returned from RecordTableGenerator.Emit.
+  const uint64_t RecordTableOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
+  // Read the schema.
+  auto SchemaOr = memprof::readMemProfSchema(Ptr);
+  if (!SchemaOr)
+    return SchemaOr.takeError();
+  Schema = SchemaOr.get();
+
+  // Initialize the frame table reader with the payload and bucket offsets.
+  MemProfFrameTable.reset(MemProfFrameHashTable::Create(
+      /*Buckets=*/Start + FrameTableOffset,
+      /*Payload=*/Ptr,
+      /*Base=*/Start));
+
+  MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
+      /*Buckets=*/Start + CallStackTableOffset,
+      /*Payload=*/Start + CallStackPayloadOffset,
+      /*Base=*/Start));
+
+  // Now initialize the table reader with a pointer into data buffer.
+  MemProfRecordTable.reset(MemProfRecordHashTable::Create(
+      /*Buckets=*/Start + RecordTableOffset,
+      /*Payload=*/Start + RecordPayloadOffset,
+      /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
+
+  return Error::success();
+}
+
+Error IndexedMemProfReader::deserialize(const unsigned char *Start,
+                                        uint64_t MemProfOffset) {
+  const unsigned char *Ptr = Start + MemProfOffset;
+
+  // Read the first 64-bit word, which may be RecordTableOffset in
+  // memprof::MemProfVersion0 or the MemProf version number in
+  // memprof::MemProfVersion1 and above.
+  const uint64_t FirstWord =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
+  if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2 ||
+      FirstWord == memprof::Version3) {
+    // Everything is good.  We can proceed to deserialize the rest.
+    Version = static_cast<memprof::IndexedVersion>(FirstWord);
+  } else if (FirstWord >= 24) {
+    // This is a heuristic/hack to detect memprof::MemProfVersion0,
+    // which does not have a version field in the header.
+    // In memprof::MemProfVersion0, FirstWord will be RecordTableOffset,
+    // which should be at least 24 because of the MemProf header size.
+    Version = memprof::Version0;
+  } else {
+    return make_error<InstrProfError>(
+        instrprof_error::unsupported_version,
+        formatv("MemProf version {} not supported; "
+                "requires version between {} and {}, inclusive",
+                FirstWord, memprof::MinimumSupportedVersion,
+                memprof::MaximumSupportedVersion));
+  }
+
+  switch (Version) {
+  case memprof::Version0:
+  case memprof::Version1:
+  case memprof::Version2:
+    if (Error E = deserializeV012(Start, Ptr, FirstWord, Version))
+      return E;
+    break;
+  case memprof::Version3:
+    if (Error E = deserializeV3(Start, Ptr, Version))
+      return E;
+    break;
+  }
+
 #ifdef EXPENSIVE_CHECKS
   // Go through all the records and verify that CSId has been correctly
   // populated.  Do this only under EXPENSIVE_CHECKS.  Otherwise, we
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index b16714ae8b9a2..64c8d2acf276c 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -619,48 +619,48 @@ static Error writeMemProfV2(ProfOStream &OS,
 
 // Write out MemProf Version3 as follows:
 // uint64_t Version
-// uint64_t RecordTableOffset = RecordTableGenerator.Emit
-// uint64_t FramePayloadOffset = Offset for the frame payload
 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
 // uint64_t CallStackPayloadOffset = Offset for the call stack payload
 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit
+// uint64_t RecordPayloadOffset = Offset for the record payload
+// uint64_t RecordTableOffset = RecordTableGenerator.Emit
 // uint64_t Num schema entries
 // uint64_t Schema entry 0
 // uint64_t Schema entry 1
 // ....
 // uint64_t Schema entry N - 1
-// OnDiskChainedHashTable MemProfRecordData
 // OnDiskChainedHashTable MemProfFrameData
 // OnDiskChainedHashTable MemProfCallStackData
+// OnDiskChainedHashTable MemProfRecordData
 static Error writeMemProfV3(ProfOStream &OS,
                             memprof::IndexedMemProfData &MemProfData,
                             bool MemProfFullSchema) {
   OS.write(memprof::Version3);
   uint64_t HeaderUpdatePos = OS.tell();
-  OS.write(0ULL); // Reserve space for the memprof record table offset.
-  OS.write(0ULL); // Reserve space for the memprof frame payload offset.
   OS.write(0ULL); // Reserve space for the memprof frame table offset.
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof call stack table offset.
+  OS.write(0ULL); // Reserve space for the memprof record payload offset.
+  OS.write(0ULL); // Reserve space for the memprof record table offset.
 
   auto Schema = memprof::getHotColdSchema();
   if (MemProfFullSchema)
     Schema = memprof::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
-  uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData,
-                                                   &Schema, memprof::Version3);
-
-  uint64_t FramePayloadOffset = OS.tell();
   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.FrameData);
 
   uint64_t CallStackPayloadOffset = OS.tell();
   uint64_t CallStackTableOffset =
       writeMemProfCallStacks(OS, MemProfData.CallStackData);
 
+  uint64_t RecordPayloadOffset = OS.tell();
+  uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData,
+                                                   &Schema, memprof::Version3);
+
   uint64_t Header[] = {
-      RecordTableOffset,      FramePayloadOffset,   FrameTableOffset,
-      CallStackPayloadOffset, CallStackTableOffset,
+      FrameTableOffset,    CallStackPayloadOffset, CallStackTableOffset,
+      RecordPayloadOffset, RecordTableOffset,
   };
   OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});
 



More information about the llvm-commits mailing list