[llvm] 99b9ab4 - [memprof] Reorder MemProf sections in profile (#93640)

Wed May 29 12:18:28 PDT 2024

Author: Kazu Hirata
Date: 2024-05-29T12:18:24-07:00
New Revision: 99b9ab45cd67648a7b6c2ba02041072fe4de346b

URL: https://github.com/llvm/llvm-project/commit/99b9ab45cd67648a7b6c2ba02041072fe4de346b
DIFF: https://github.com/llvm/llvm-project/commit/99b9ab45cd67648a7b6c2ba02041072fe4de346b.diff

LOG: [memprof] Reorder MemProf sections in profile (#93640)

This patch teaches the V3 format to serialize Frames, call stacks, and
IndexedMemProfRecords, in that order.

I'm planning to use linear IDs for Frames.  That is, Frames will be
numbered 0, 1, 2, and so on in the order we serialize them.  In turn,
we will seialize the call stacks in terms of those linear IDs.

Likewise, I'm planning to use linear IDs for call stacks and then
serialize IndexedMemProfRecords in terms of those linear IDs for call
stacks.

With the new order, we can successively free data structures as we
serialize them.  That is, once we serialize Frames, we can free the
Frames' data proper and just retain mappings from FrameIds to linear
IDs.  A similar story applies to call stacks.

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/InstrProfReader.h
    llvm/lib/ProfileData/InstrProfReader.cpp
    llvm/lib/ProfileData/InstrProfWriter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 46aa1b6c2bfe7..8d475fb048624 100644

--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -660,6 +660,11 @@ class IndexedMemProfReader {
   /// MemProf call stack data on-disk indexed via call stack id.
   std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
 
+  Error deserializeV012(const unsigned char *Start, const unsigned char *Ptr,
+                        uint64_t FirstWord, memprof::IndexedVersion Version);
+  Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr,
+                      memprof::IndexedVersion Version);
+
 public:
   IndexedMemProfReader() = default;
 

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index a5ae0c6fa6244..1b36ca1a733a2 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1202,35 +1202,10 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
   }
 }
 
-Error IndexedMemProfReader::deserialize(const unsigned char *Start,
-                                        uint64_t MemProfOffset) {
-  const unsigned char *Ptr = Start + MemProfOffset;
-
-  // Read the first 64-bit word, which may be RecordTableOffset in
-  // memprof::MemProfVersion0 or the MemProf version number in
-  // memprof::MemProfVersion1 and above.
-  const uint64_t FirstWord =
-      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-
-  if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2 ||
-      FirstWord == memprof::Version3) {
-    // Everything is good.  We can proceed to deserialize the rest.
-    Version = static_cast<memprof::IndexedVersion>(FirstWord);
-  } else if (FirstWord >= 24) {
-    // This is a heuristic/hack to detect memprof::MemProfVersion0,
-    // which does not have a version field in the header.
-    // In memprof::MemProfVersion0, FirstWord will be RecordTableOffset,
-    // which should be at least 24 because of the MemProf header size.
-    Version = memprof::Version0;
-  } else {
-    return make_error<InstrProfError>(
-        instrprof_error::unsupported_version,
-        formatv("MemProf version {} not supported; "
-                "requires version between {} and {}, inclusive",
-                FirstWord, memprof::MinimumSupportedVersion,
-                memprof::MaximumSupportedVersion));
-  }
-
+Error IndexedMemProfReader::deserializeV012(const unsigned char *Start,
+                                            const unsigned char *Ptr,
+                                            uint64_t FirstWord,
+                                            memprof::IndexedVersion Version) {
   // The value returned from RecordTableGenerator.Emit.
   const uint64_t RecordTableOffset =
       Version == memprof::Version0
@@ -1280,6 +1255,97 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
         /*Payload=*/Start + CallStackPayloadOffset,
         /*Base=*/Start));
 
+  return Error::success();
+}
+
+Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
+                                          const unsigned char *Ptr,
+                                          memprof::IndexedVersion Version) {
+  // The value returned from FrameTableGenerator.Emit.
+  const uint64_t FrameTableOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The offset in the stream right before invoking
+  // CallStackTableGenerator.Emit.
+  const uint64_t CallStackPayloadOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The value returned from CallStackTableGenerator.Emit.
+  const uint64_t CallStackTableOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The offset in the stream right before invoking RecordTableGenerator.Emit.
+  const uint64_t RecordPayloadOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The value returned from RecordTableGenerator.Emit.
+  const uint64_t RecordTableOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
+  // Read the schema.
+  auto SchemaOr = memprof::readMemProfSchema(Ptr);
+  if (!SchemaOr)
+    return SchemaOr.takeError();
+  Schema = SchemaOr.get();
+
+  // Initialize the frame table reader with the payload and bucket offsets.
+  MemProfFrameTable.reset(MemProfFrameHashTable::Create(
+      /*Buckets=*/Start + FrameTableOffset,
+      /*Payload=*/Ptr,
+      /*Base=*/Start));
+
+  MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
+      /*Buckets=*/Start + CallStackTableOffset,
+      /*Payload=*/Start + CallStackPayloadOffset,
+      /*Base=*/Start));
+
+  // Now initialize the table reader with a pointer into data buffer.
+  MemProfRecordTable.reset(MemProfRecordHashTable::Create(
+      /*Buckets=*/Start + RecordTableOffset,
+      /*Payload=*/Start + RecordPayloadOffset,
+      /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
+
+  return Error::success();
+}
+
+Error IndexedMemProfReader::deserialize(const unsigned char *Start,
+                                        uint64_t MemProfOffset) {
+  const unsigned char *Ptr = Start + MemProfOffset;
+
+  // Read the first 64-bit word, which may be RecordTableOffset in
+  // memprof::MemProfVersion0 or the MemProf version number in
+  // memprof::MemProfVersion1 and above.
+  const uint64_t FirstWord =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
+  if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2 ||
+      FirstWord == memprof::Version3) {
+    // Everything is good.  We can proceed to deserialize the rest.
+    Version = static_cast<memprof::IndexedVersion>(FirstWord);
+  } else if (FirstWord >= 24) {
+    // This is a heuristic/hack to detect memprof::MemProfVersion0,
+    // which does not have a version field in the header.
+    // In memprof::MemProfVersion0, FirstWord will be RecordTableOffset,
+    // which should be at least 24 because of the MemProf header size.
+    Version = memprof::Version0;
+  } else {
+    return make_error<InstrProfError>(
+        instrprof_error::unsupported_version,
+        formatv("MemProf version {} not supported; "
+                "requires version between {} and {}, inclusive",
+                FirstWord, memprof::MinimumSupportedVersion,
+                memprof::MaximumSupportedVersion));
+  }
+
+  switch (Version) {
+  case memprof::Version0:
+  case memprof::Version1:
+  case memprof::Version2:
+    if (Error E = deserializeV012(Start, Ptr, FirstWord, Version))
+      return E;
+    break;
+  case memprof::Version3:
+    if (Error E = deserializeV3(Start, Ptr, Version))
+      return E;
+    break;
+  }
+
 #ifdef EXPENSIVE_CHECKS
   // Go through all the records and verify that CSId has been correctly
   // populated.  Do this only under EXPENSIVE_CHECKS.  Otherwise, we

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index e732882337d46..7e0c9a159d932 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -619,48 +619,48 @@ static Error writeMemProfV2(ProfOStream &OS,
 
 // Write out MemProf Version3 as follows:
 // uint64_t Version
-// uint64_t RecordTableOffset = RecordTableGenerator.Emit
-// uint64_t FramePayloadOffset = Offset for the frame payload
 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
 // uint64_t CallStackPayloadOffset = Offset for the call stack payload
 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit
+// uint64_t RecordPayloadOffset = Offset for the record payload
+// uint64_t RecordTableOffset = RecordTableGenerator.Emit
 // uint64_t Num schema entries
 // uint64_t Schema entry 0
 // uint64_t Schema entry 1
 // ....
 // uint64_t Schema entry N - 1
-// OnDiskChainedHashTable MemProfRecordData
 // OnDiskChainedHashTable MemProfFrameData
 // OnDiskChainedHashTable MemProfCallStackData
+// OnDiskChainedHashTable MemProfRecordData
 static Error writeMemProfV3(ProfOStream &OS,
                             memprof::IndexedMemProfData &MemProfData,
                             bool MemProfFullSchema) {
   OS.write(memprof::Version3);
   uint64_t HeaderUpdatePos = OS.tell();
-  OS.write(0ULL); // Reserve space for the memprof record table offset.
-  OS.write(0ULL); // Reserve space for the memprof frame payload offset.
   OS.write(0ULL); // Reserve space for the memprof frame table offset.
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof call stack table offset.
+  OS.write(0ULL); // Reserve space for the memprof record payload offset.
+  OS.write(0ULL); // Reserve space for the memprof record table offset.
 
   auto Schema = memprof::getHotColdSchema();
   if (MemProfFullSchema)
     Schema = memprof::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
-  uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData,
-                                                   &Schema, memprof::Version3);
-
-  uint64_t FramePayloadOffset = OS.tell();
   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.FrameData);
 
   uint64_t CallStackPayloadOffset = OS.tell();
   uint64_t CallStackTableOffset =
       writeMemProfCallStacks(OS, MemProfData.CallStackData);
 
+  uint64_t RecordPayloadOffset = OS.tell();
+  uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData,
+                                                   &Schema, memprof::Version3);
+
   uint64_t Header[] = {
-      RecordTableOffset,      FramePayloadOffset,   FrameTableOffset,
-      CallStackPayloadOffset, CallStackTableOffset,
+      FrameTableOffset,    CallStackPayloadOffset, CallStackTableOffset,
+      RecordPayloadOffset, RecordTableOffset,
   };
   OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});