[llvm] [memprof] Add Version2 of the indexed MemProf format (PR #89100)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 18 13:58:45 PDT 2024


https://github.com/kazutakahirata updated https://github.com/llvm/llvm-project/pull/89100

>From ac79223ae26d6a2d702b0067cf4dca114b5fc581 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Thu, 11 Apr 2024 10:24:26 -0700
Subject: [PATCH 1/4] [memprof] Add Version2 of the indexed MemProf format

This patch adds Version2 of the indexed MemProf format.  The new
format comes with a hash table from CallStackId to actual call stacks
llvm::SmallVector<FrameId>.  The rest of the format refers to call
stacks with CallStackId.  This "values + references" model effectively
deduplicates call stacks.  Without this patch, a large indexed memprof
file of mine shrinks from 4.4GB to 1.6GB, a 64% reduction.

This patch does not make Version2 generally available yet as I am
planning to make a few more changes to the format.
---
 .../llvm/ProfileData/InstrProfReader.h        |  4 +
 .../llvm/ProfileData/InstrProfWriter.h        | 10 ++
 llvm/include/llvm/ProfileData/MemProf.h       | 92 ++++++++++++++++++-
 llvm/include/llvm/ProfileData/MemProfReader.h |  6 ++
 llvm/lib/ProfileData/InstrProfReader.cpp      | 66 +++++++++++--
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 90 ++++++++++++++++--
 .../tools/llvm-profdata/memprof-merge-v0.test |  3 +
 llvm/tools/llvm-profdata/llvm-profdata.cpp    | 15 ++-
 8 files changed, 270 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index f662febb9216bb..c45b9084744d63 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -560,6 +560,8 @@ using MemProfRecordHashTable =
     OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
 using MemProfFrameHashTable =
     OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
+using MemProfCallStackHashTable =
+    OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
 
 template <typename HashTableImpl>
 class InstrProfReaderItaniumRemapper;
@@ -666,6 +668,8 @@ class IndexedInstrProfReader : public InstrProfReader {
   std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
   /// MemProf frame profile data on-disk indexed via frame id.
   std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+  /// MemProf call stack data on-disk indexed via call stack id.
+  std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
   /// VTableNamePtr points to the beginning of compressed vtable names.
   /// When a symtab is constructed from profiles by llvm-profdata, the list of
   /// names could be decompressed based on `VTableNamePtr` and
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 4b42392bc1e061..66372a2253a1e7 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -60,6 +60,10 @@ class InstrProfWriter {
   // inline.
   llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData;
 
+  // A map to hold call stack id to call stacks.
+  llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+      MemProfCallStackData;
+
   // List of binary ids.
   std::vector<llvm::object::BuildID> BinaryIds;
 
@@ -114,6 +118,12 @@ class InstrProfWriter {
   bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F,
                        function_ref<void(Error)> Warn);
 
+  /// Add a call stack identified by the hash of the contents of the call stack
+  /// in \p CallStack.
+  bool addMemProfCallStack(const memprof::CallStackId CSId,
+                           const llvm::SmallVector<memprof::FrameId> &CallStack,
+                           function_ref<void(Error)> Warn);
+
   // Add a binary id to the binary ids list.
   void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
 
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 7f3956bd739390..07f03fe1ac57f2 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -29,7 +29,7 @@ enum IndexedVersion : uint64_t {
 };
 
 constexpr uint64_t MinimumSupportedVersion = Version0;
-constexpr uint64_t MaximumSupportedVersion = Version1;
+constexpr uint64_t MaximumSupportedVersion = Version2;
 
 // Verify that the minimum and maximum satisfy the obvious constraint.
 static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
@@ -633,6 +633,96 @@ class FrameLookupTrait {
   }
 };
 
+// Trait for writing call stacks to the on-disk hash table.
+class CallStackWriterTrait {
+public:
+  using key_type = CallStackId;
+  using key_type_ref = CallStackId;
+
+  using data_type = llvm::SmallVector<FrameId>;
+  using data_type_ref = llvm::SmallVector<FrameId> &;
+
+  using hash_value_type = CallStackId;
+  using offset_type = uint64_t;
+
+  static hash_value_type ComputeHash(key_type_ref K) { return K; }
+
+  static std::pair<offset_type, offset_type>
+  EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+    using namespace support;
+    endian::Writer LE(Out, llvm::endianness::little);
+    offset_type N = sizeof(K);
+    LE.write<offset_type>(N);
+    offset_type M = sizeof(CallStackId) * V.size();
+    LE.write<offset_type>(M);
+    return std::make_pair(N, M);
+  }
+
+  void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
+    using namespace support;
+    endian::Writer LE(Out, llvm::endianness::little);
+    LE.write<key_type>(K);
+  }
+
+  void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
+                offset_type /*Unused*/) {
+    using namespace support;
+    endian::Writer LE(Out, llvm::endianness::little);
+    // Emit the frames.  We do not explicitly emit the length of the vector
+    // because it can be inferred from the data length.
+    for (FrameId F : V)
+      LE.write<FrameId>(F);
+  }
+};
+
+// Trait for reading call stack mappings from the on-disk hash table.
+class CallStackLookupTrait {
+public:
+  using data_type = const llvm::SmallVector<FrameId>;
+  using internal_key_type = CallStackId;
+  using external_key_type = CallStackId;
+  using hash_value_type = CallStackId;
+  using offset_type = uint64_t;
+
+  static bool EqualKey(internal_key_type A, internal_key_type B) {
+    return A == B;
+  }
+  static uint64_t GetInternalKey(internal_key_type K) { return K; }
+  static uint64_t GetExternalKey(external_key_type K) { return K; }
+
+  hash_value_type ComputeHash(internal_key_type K) { return K; }
+
+  static std::pair<offset_type, offset_type>
+  ReadKeyDataLength(const unsigned char *&D) {
+    using namespace support;
+
+    offset_type KeyLen =
+        endian::readNext<offset_type, llvm::endianness::little>(D);
+    offset_type DataLen =
+        endian::readNext<offset_type, llvm::endianness::little>(D);
+    return std::make_pair(KeyLen, DataLen);
+  }
+
+  uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
+    using namespace support;
+    return endian::readNext<external_key_type, llvm::endianness::little>(D);
+  }
+
+  data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
+    using namespace support;
+    llvm::SmallVector<FrameId> CS;
+    // Derive the number of frames from the data length.
+    uint64_t NumFrames = Length / sizeof(CallStackId);
+    assert(Length % sizeof(CallStackId) == 0);
+    CS.reserve(NumFrames);
+    for (size_t I = 0; I != NumFrames; ++I) {
+      FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
+      CS.push_back(F);
+    }
+    return CS;
+  }
+};
+
 // Compute a CallStackId for a given call stack.
 CallStackId hashCallStack(ArrayRef<FrameId> CS);
 
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 7fa8af184dc93b..444c58e8bdc8bc 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -51,6 +51,12 @@ class MemProfReader {
     return IdToFrame;
   }
 
+  // Return a const reference to the internal Id to call stacks.
+  const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> &
+  getCallStacks() const {
+    return CSIdToCallStack;
+  }
+
   // Return a const reference to the internal function profile data.
   const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
   getProfileData() const {
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 8574a96a1b06fc..ef509b821ff71b 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1249,14 +1249,14 @@ Error IndexedInstrProfReader::readHeader() {
 
     // Read the first 64-bit word, which may be RecordTableOffset in
     // memprof::MemProfVersion0 or the MemProf version number in
-    // memprof::MemProfVersion1.
+    // memprof::MemProfVersion1 and above.
     const uint64_t FirstWord =
         support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
 
     memprof::IndexedVersion Version = memprof::Version0;
-    if (FirstWord == memprof::Version1) {
+    if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) {
       // Everything is good.  We can proceed to deserialize the rest.
-      Version = memprof::Version1;
+      Version = static_cast<memprof::IndexedVersion>(FirstWord);
     } else if (FirstWord >= 24) {
       // This is a heuristic/hack to detect memprof::MemProfVersion0,
       // which does not have a version field in the header.
@@ -1286,6 +1286,18 @@ Error IndexedInstrProfReader::readHeader() {
     const uint64_t FrameTableOffset =
         support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
 
+    // The offset in the stream right before invoking
+    // CallStackTableGenerator.Emit.
+    uint64_t CallStackPayloadOffset = 0;
+    // The value returned from CallStackTableGenerator.Emit.
+    uint64_t CallStackTableOffset = 0;
+    if (Version >= memprof::Version2) {
+      CallStackPayloadOffset =
+          support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+      CallStackTableOffset =
+          support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+    }
+
     // Read the schema.
     auto SchemaOr = memprof::readMemProfSchema(Ptr);
     if (!SchemaOr)
@@ -1296,7 +1308,7 @@ Error IndexedInstrProfReader::readHeader() {
     MemProfRecordTable.reset(MemProfRecordHashTable::Create(
         /*Buckets=*/Start + RecordTableOffset,
         /*Payload=*/Ptr,
-        /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
+        /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
 
     // Initialize the frame table reader with the payload and bucket offsets.
     MemProfFrameTable.reset(MemProfFrameHashTable::Create(
@@ -1304,12 +1316,22 @@ Error IndexedInstrProfReader::readHeader() {
         /*Payload=*/Start + FramePayloadOffset,
         /*Base=*/Start, memprof::FrameLookupTrait()));
 
+    if (Version >= memprof::Version2)
+      MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
+          /*Buckets=*/Start + CallStackTableOffset,
+          /*Payload=*/Start + CallStackPayloadOffset,
+          /*Base=*/Start, memprof::CallStackLookupTrait()));
+
 #ifdef EXPENSIVE_CHECKS
     // Go through all the records and verify that CSId has been correctly
     // populated.  Do this only under EXPENSIVE_CHECKS.  Otherwise, we
     // would defeat the purpose of OnDiskIterableChainedHashTable.
-    for (const auto &Record : MemProfRecordTable->data())
-      verifyIndexedMemProfRecord(Record);
+    // Note that we can compare CSId against actual call stacks only for
+    // Version0 and Version1 because IndexedAllocationInfo::CallStack and
+    // IndexedMemProfRecord::CallSites are not populated in Version2.
+    if (Version <= memprof::Version1)
+      for (const auto &Record : MemProfRecordTable->data())
+        verifyIndexedMemProfRecord(Record);
 #endif
   }
 
@@ -1502,7 +1524,29 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
     return *FrIter;
   };
 
-  memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
+  // Setup a callback to convert call stack ids to call stacks using the on-disk
+  // hash table.
+  std::optional<memprof::CallStackId> LastUnmappedCSId;
+  auto CSIdToCallStackCallback = [&](memprof::CallStackId CSId) {
+    llvm::SmallVector<memprof::Frame> Frames;
+    auto CSIter = MemProfCallStackTable->find(CSId);
+    if (CSIter == MemProfCallStackTable->end()) {
+      LastUnmappedFrameId = CSId;
+    } else {
+      const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
+      Frames.reserve(CS.size());
+      for (memprof::FrameId Id : CS)
+        Frames.push_back(IdToFrameCallback(Id));
+    }
+    return Frames;
+  };
+
+  const memprof::IndexedMemProfRecord IndexedRecord = *Iter;
+  memprof::MemProfRecord Record;
+  if (MemProfCallStackTable)
+    Record = IndexedRecord.toMemProfRecord(CSIdToCallStackCallback);
+  else
+    Record = memprof::MemProfRecord(IndexedRecord, IdToFrameCallback);
 
   // Check that all frame ids were successfully converted to frames.
   if (LastUnmappedFrameId) {
@@ -1510,6 +1554,14 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
                                       "memprof frame not found for frame id " +
                                           Twine(*LastUnmappedFrameId));
   }
+
+  // Check that all call stack ids were successfully converted to call stacks.
+  if (LastUnmappedCSId) {
+    return make_error<InstrProfError>(
+        instrprof_error::hash_mismatch,
+        "memprof call stack not found for call stack id " +
+            Twine(*LastUnmappedCSId));
+  }
   return Record;
 }
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index ede042dd6b9c3c..6e01e52e5905ee 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -297,6 +297,23 @@ bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
   return true;
 }
 
+bool InstrProfWriter::addMemProfCallStack(
+    const memprof::CallStackId CSId,
+    const llvm::SmallVector<memprof::FrameId> &CallStack,
+    function_ref<void(Error)> Warn) {
+  auto Result = MemProfCallStackData.insert({CSId, CallStack});
+  // If a mapping already exists for the current call stack id and it does not
+  // match the new mapping provided then reset the existing contents and bail
+  // out. We don't support the merging of memprof data whose CallStack -> Id
+  // mapping across profiles is inconsistent.
+  if (!Result.second && Result.first->second != CallStack) {
+    Warn(make_error<InstrProfError>(instrprof_error::malformed,
+                                    "call stack to id mapping mismatch"));
+    return false;
+  }
+  return true;
+}
+
 void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
   llvm::append_range(BinaryIds, BIs);
 }
@@ -378,6 +395,12 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
       return;
   }
 
+  MemProfCallStackData.reserve(IPW.MemProfCallStackData.size());
+  for (auto &[CSId, CallStack] : IPW.MemProfCallStackData) {
+    if (!addMemProfCallStack(CSId, CallStack, Warn))
+      return;
+  }
+
   MemProfRecordData.reserve(IPW.MemProfRecordData.size());
   for (auto &I : IPW.MemProfRecordData) {
     addMemProfRecord(I.first, I.second);
@@ -427,8 +450,8 @@ static uint64_t writeMemProfRecords(
     ProfOStream &OS,
     llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
         &MemProfRecordData,
-    memprof::MemProfSchema *Schema) {
-  memprof::RecordWriterTrait RecordWriter(memprof::Version1);
+    memprof::MemProfSchema *Schema, memprof::IndexedVersion Version) {
+  memprof::RecordWriterTrait RecordWriter(Version);
   RecordWriter.Schema = Schema;
   OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
       RecordTableGenerator;
@@ -461,6 +484,20 @@ static uint64_t writeMemProfFrames(
   return FrameTableGenerator.Emit(OS.OS);
 }
 
+static uint64_t writeMemProfCallStacks(
+    ProfOStream &OS,
+    llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+        &MemProfCallStackData) {
+  OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
+      CallStackTableGenerator;
+  for (auto &[CSId, CallStack] : MemProfCallStackData)
+    CallStackTableGenerator.insert(CSId, CallStack);
+  // Release the memory of this vector as it is no longer needed.
+  MemProfCallStackData.clear();
+
+  return CallStackTableGenerator.Emit(OS.OS);
+}
+
 static Error writeMemProfV0(
     ProfOStream &OS,
     llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
@@ -475,7 +512,7 @@ static Error writeMemProfV0(
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
-      writeMemProfRecords(OS, MemProfRecordData, &Schema);
+      writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version0);
 
   uint64_t FramePayloadOffset = OS.tell();
   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
@@ -501,7 +538,7 @@ static Error writeMemProfV1(
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
-      writeMemProfRecords(OS, MemProfRecordData, &Schema);
+      writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version1);
 
   uint64_t FramePayloadOffset = OS.tell();
   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
@@ -512,6 +549,43 @@ static Error writeMemProfV1(
   return Error::success();
 }
 
+static Error writeMemProfV2(
+    ProfOStream &OS,
+    llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
+        &MemProfRecordData,
+    llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
+    llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+        &MemProfCallStackData) {
+  OS.write(memprof::Version2);
+  uint64_t HeaderUpdatePos = OS.tell();
+  OS.write(0ULL); // Reserve space for the memprof record table offset.
+  OS.write(0ULL); // Reserve space for the memprof frame payload offset.
+  OS.write(0ULL); // Reserve space for the memprof frame table offset.
+  OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
+  OS.write(0ULL); // Reserve space for the memprof call stack table offset.
+
+  auto Schema = memprof::PortableMemInfoBlock::getSchema();
+  writeMemProfSchema(OS, Schema);
+
+  uint64_t RecordTableOffset =
+      writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version2);
+
+  uint64_t FramePayloadOffset = OS.tell();
+  uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData);
+
+  uint64_t CallStackPayloadOffset = OS.tell();
+  uint64_t CallStackTableOffset =
+      writeMemProfCallStacks(OS, MemProfCallStackData);
+
+  uint64_t Header[] = {
+      RecordTableOffset,      FramePayloadOffset,   FrameTableOffset,
+      CallStackPayloadOffset, CallStackTableOffset,
+  };
+  OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});
+
+  return Error::success();
+}
+
 // The MemProf profile data includes a simple schema
 // with the format described below followed by the hashtable:
 // uint64_t Version
@@ -530,6 +604,8 @@ static Error writeMemProf(
     llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
         &MemProfRecordData,
     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
+    llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+        &MemProfCallStackData,
     memprof::IndexedVersion MemProfVersionRequested) {
 
   switch (MemProfVersionRequested) {
@@ -538,8 +614,8 @@ static Error writeMemProf(
   case memprof::Version1:
     return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData);
   case memprof::Version2:
-    // TODO: Implement.  Fall through to the error handling below for now.
-    break;
+    return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData,
+                          MemProfCallStackData);
   }
 
   return make_error<InstrProfError>(
@@ -658,7 +734,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
     MemProfSectionStart = OS.tell();
     if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData,
-                              MemProfVersionRequested))
+                              MemProfCallStackData, MemProfVersionRequested))
       return E;
   }
 
diff --git a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
index 68132961eb78da..03ccbdd42efdad 100644
--- a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
+++ b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
@@ -13,6 +13,9 @@ RUN: llvm-profdata show %t.prof.v0 | FileCheck %s
 RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=1 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v1
 RUN: llvm-profdata show %t.prof.v1 | FileCheck %s
 
+RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
+RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
+
 For now we only check the validity of the instrumented profile since we don't
 have a way to display the contents of the memprof indexed format yet.
 
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 66a4cea0353479..78daf9f7dc109a 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -305,7 +305,8 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
     cl::desc("Specify the version of the memprof format to use"),
     cl::init(memprof::Version0),
     cl::values(clEnumValN(memprof::Version0, "0", "version 0"),
-               clEnumValN(memprof::Version1, "1", "version 1")));
+               clEnumValN(memprof::Version1, "1", "version 1"),
+               clEnumValN(memprof::Version2, "2", "version 2")));
 
 // Options specific to overlap subcommand.
 cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
@@ -677,6 +678,18 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
       if (!Succeeded)
         return;
     }
+
+    // Add the call stacks into the writer context.
+    const auto &CSIdToCallStacks = Reader->getCallStacks();
+    for (const auto &I : CSIdToCallStacks) {
+      bool Succeeded = WC->Writer.addMemProfCallStack(
+          /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
+      // If we weren't able to add the call stacks then it doesn't make sense
+      // to try to add the records from this profile.
+      if (!Succeeded)
+        return;
+    }
+
     const auto &FunctionProfileData = Reader->getProfileData();
     // Add the memprof records into the writer context.
     for (const auto &[GUID, Record] : FunctionProfileData) {

>From 90c2125bf08a212df29ce82e448f5285262d245f Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 17 Apr 2024 15:08:05 -0700
Subject: [PATCH 2/4] Address comments.

---
 llvm/include/llvm/ProfileData/MemProf.h  | 4 ++--
 llvm/lib/ProfileData/InstrProfReader.cpp | 2 +-
 llvm/lib/ProfileData/InstrProfWriter.cpp | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 07f03fe1ac57f2..55548e3dde17cd 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -653,7 +653,7 @@ class CallStackWriterTrait {
     endian::Writer LE(Out, llvm::endianness::little);
     offset_type N = sizeof(K);
     LE.write<offset_type>(N);
-    offset_type M = sizeof(CallStackId) * V.size();
+    offset_type M = sizeof(FrameId) * V.size();
     LE.write<offset_type>(M);
     return std::make_pair(N, M);
   }
@@ -712,7 +712,7 @@ class CallStackLookupTrait {
     using namespace support;
     llvm::SmallVector<FrameId> CS;
     // Derive the number of frames from the data length.
-    uint64_t NumFrames = Length / sizeof(CallStackId);
+    uint64_t NumFrames = Length / sizeof(FrameId);
     assert(Length % sizeof(CallStackId) == 0);
     CS.reserve(NumFrames);
     for (size_t I = 0; I != NumFrames; ++I) {
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index ef509b821ff71b..1e42ae88adfe0a 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1531,7 +1531,7 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
     llvm::SmallVector<memprof::Frame> Frames;
     auto CSIter = MemProfCallStackTable->find(CSId);
     if (CSIter == MemProfCallStackTable->end()) {
-      LastUnmappedFrameId = CSId;
+      LastUnmappedCSId = CSId;
     } else {
       const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
       Frames.reserve(CS.size());
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 6e01e52e5905ee..f70a49778e752b 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -301,12 +301,12 @@ bool InstrProfWriter::addMemProfCallStack(
     const memprof::CallStackId CSId,
     const llvm::SmallVector<memprof::FrameId> &CallStack,
     function_ref<void(Error)> Warn) {
-  auto Result = MemProfCallStackData.insert({CSId, CallStack});
+  auto [Iter, Inserted] = MemProfCallStackData.insert({CSId, CallStack});
   // If a mapping already exists for the current call stack id and it does not
   // match the new mapping provided then reset the existing contents and bail
   // out. We don't support the merging of memprof data whose CallStack -> Id
   // mapping across profiles is inconsistent.
-  if (!Result.second && Result.first->second != CallStack) {
+  if (!Inserted && Iter->second != CallStack) {
     Warn(make_error<InstrProfError>(instrprof_error::malformed,
                                     "call stack to id mapping mismatch"));
     return false;

>From 57fe152f3f1067c2e75785f03af94b37861dad0e Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 17 Apr 2024 15:57:01 -0700
Subject: [PATCH 3/4] Add MemProfFileTest.cpp

---
 .../llvm/ProfileData/InstrProfWriter.h        |  3 +
 llvm/unittests/ProfileData/InstrProfTest.cpp  | 87 ++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 66372a2253a1e7..0e83076611c18f 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -200,6 +200,9 @@ class InstrProfWriter {
   // Internal interface for testing purpose only.
   void setValueProfDataEndianness(llvm::endianness Endianness);
   void setOutputSparse(bool Sparse);
+  void setMemProfVersionRequested(memprof::IndexedVersion Version) {
+    MemProfVersionRequested = Version;
+  }
   // Compute the overlap b/w this object and Other. Program level result is
   // stored in Overlap and function level result is stored in FuncLevelOverlap.
   void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 82701c47daf71c..08dfcedad02724 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -347,6 +347,9 @@ using ::llvm::memprof::IndexedMemProfRecord;
 using ::llvm::memprof::MemInfoBlock;
 using FrameIdMapTy =
     llvm::DenseMap<::llvm::memprof::FrameId, ::llvm::memprof::Frame>;
+using CallStackIdMapTy =
+    llvm::DenseMap<::llvm::memprof::CallStackId,
+                   ::llvm::SmallVector<::llvm::memprof::FrameId>>;
 
 static FrameIdMapTy getFrameMapping() {
   FrameIdMapTy Mapping;
@@ -359,6 +362,14 @@ static FrameIdMapTy getFrameMapping() {
   return Mapping;
 }
 
+static CallStackIdMapTy getCallStackMapping() {
+  CallStackIdMapTy Mapping;
+  Mapping.insert({0x111, {0, 1}});
+  Mapping.insert({0x222, {2, 3}});
+  Mapping.insert({0x333, {4, 5}});
+  return Mapping;
+}
+
 IndexedMemProfRecord makeRecord(
     std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
         AllocFrames,
@@ -374,6 +385,19 @@ IndexedMemProfRecord makeRecord(
   return MR;
 }
 
+IndexedMemProfRecord
+makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
+             std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames,
+             const MemInfoBlock &Block = MemInfoBlock()) {
+  llvm::memprof::IndexedMemProfRecord MR;
+  for (const auto &CSId : AllocFrames)
+    MR.AllocSites.emplace_back(::llvm::SmallVector<memprof::FrameId>(), CSId,
+                               Block);
+  for (const auto &CSId : CallSiteFrames)
+    MR.CallSiteIds.push_back(CSId);
+  return MR;
+}
+
 MATCHER_P(EqualsRecord, Want, "") {
   const memprof::MemProfRecord &Got = arg;
 
@@ -408,7 +432,7 @@ MATCHER_P(EqualsRecord, Want, "") {
   return true;
 }
 
-TEST_F(InstrProfTest, test_memprof) {
+TEST_F(InstrProfTest, test_memprof_v0) {
   ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
                     Succeeded());
 
@@ -450,6 +474,67 @@ TEST_F(InstrProfTest, test_memprof) {
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }
 
+TEST_F(InstrProfTest, test_memprof_v2) {
+  Writer.setMemProfVersionRequested(memprof::Version2);
+
+  ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
+                    Succeeded());
+
+  const IndexedMemProfRecord IndexedMR = makeRecordV2(
+      /*AllocFrames=*/{0x111, 0x222},
+      /*CallSiteFrames=*/{0x333});
+  const FrameIdMapTy IdToFrameMap = getFrameMapping();
+  const auto CSIdToCallStackMap = getCallStackMapping();
+  for (const auto &I : IdToFrameMap) {
+    Writer.addMemProfFrame(I.first, I.getSecond(), Err);
+  }
+  for (const auto &I : CSIdToCallStackMap) {
+    Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
+  }
+  Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
+
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  auto RecordOr = Reader->getMemProfRecord(0x9999);
+  ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
+  const memprof::MemProfRecord &Record = RecordOr.get();
+
+  std::optional<memprof::FrameId> LastUnmappedFrameId;
+  auto IdToFrameCallback = [&](const memprof::FrameId Id) {
+    auto Iter = IdToFrameMap.find(Id);
+    if (Iter == IdToFrameMap.end()) {
+      LastUnmappedFrameId = Id;
+      return memprof::Frame(0, 0, 0, false);
+    }
+    return Iter->second;
+  };
+
+  std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId;
+  auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) {
+    llvm::SmallVector<memprof::Frame> Frames;
+    auto CSIter = CSIdToCallStackMap.find(CSId);
+    if (CSIter == CSIdToCallStackMap.end()) {
+      LastUnmappedCSId = CSId;
+    } else {
+      const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
+          CSIter->getSecond();
+      Frames.reserve(CS.size());
+      for (::llvm::memprof::FrameId Id : CS)
+        Frames.push_back(IdToFrameCallback(Id));
+    }
+    return Frames;
+  };
+
+  const ::llvm::memprof::MemProfRecord WantRecord =
+      IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
+  ASSERT_FALSE(LastUnmappedFrameId.has_value())
+      << "could not map frame id: " << *LastUnmappedFrameId;
+  ASSERT_FALSE(LastUnmappedCSId.has_value())
+      << "could not map call stack id: " << *LastUnmappedCSId;
+  EXPECT_THAT(WantRecord, EqualsRecord(Record));
+}
+
 TEST_F(InstrProfTest, test_memprof_getrecord_error) {
   ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
                     Succeeded());

>From 57881fa6d98e34fab0d86c97ca51a53024dedd3d Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Thu, 18 Apr 2024 13:48:12 -0700
Subject: [PATCH 4/4] Address comments.

---
 llvm/include/llvm/ProfileData/InstrProfWriter.h | 2 +-
 llvm/unittests/ProfileData/InstrProfTest.cpp    | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 0e83076611c18f..b0ae8f364fcafb 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -197,7 +197,7 @@ class InstrProfWriter {
     return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
   }
 
-  // Internal interface for testing purpose only.
+  // Internal interfaces for testing purpose only.
   void setValueProfDataEndianness(llvm::endianness Endianness);
   void setOutputSparse(bool Sparse);
   void setMemProfVersionRequested(memprof::IndexedVersion Version) {
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 08dfcedad02724..16b8b50914cc6b 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -391,6 +391,8 @@ makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
              const MemInfoBlock &Block = MemInfoBlock()) {
   llvm::memprof::IndexedMemProfRecord MR;
   for (const auto &CSId : AllocFrames)
+    // We don't populate IndexedAllocationInfo::CallStack because we use it only
+    // in Version0 and Version1.
     MR.AllocSites.emplace_back(::llvm::SmallVector<memprof::FrameId>(), CSId,
                                Block);
   for (const auto &CSId : CallSiteFrames)
@@ -528,9 +530,9 @@ TEST_F(InstrProfTest, test_memprof_v2) {
 
   const ::llvm::memprof::MemProfRecord WantRecord =
       IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
-  ASSERT_FALSE(LastUnmappedFrameId.has_value())
+  ASSERT_EQ(LastUnmappedFrameId, std::nullopt)
       << "could not map frame id: " << *LastUnmappedFrameId;
-  ASSERT_FALSE(LastUnmappedCSId.has_value())
+  ASSERT_EQ(LastUnmappedCSId, std::nullopt)
       << "could not map call stack id: " << *LastUnmappedCSId;
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }



More information about the llvm-commits mailing list