[llvm] [MemProf] Add v4 which contains CalleeGuids to CallSiteInfo. (PR #137394)

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Thu May 1 19:21:47 PDT 2025


https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/137394

>From a796d02755c9505b54d67805dff98383b8c537aa Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 25 Apr 2025 13:28:20 -0700
Subject: [PATCH 1/4] [MemProf] Add v4 which contains CalleeGuids to
 CallSiteInfo.

This patch adds CalleeGuids to the serialized format and increments the version number to 4.
---
 .../llvm/ProfileData/InstrProfReader.h        |  3 +-
 llvm/include/llvm/ProfileData/MemProf.h       |  4 +-
 llvm/include/llvm/ProfileData/MemProfYAML.h   | 60 +++++++++++-
 llvm/lib/ProfileData/IndexedMemProfData.cpp   | 77 +++++++++-------
 llvm/lib/ProfileData/InstrProfReader.cpp      | 21 ++---
 llvm/lib/ProfileData/MemProf.cpp              | 91 +++++++++++++++++++
 .../tools/llvm-profdata/memprof-yaml.test     | 12 +--
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |  3 +-
 llvm/unittests/ProfileData/MemProfTest.cpp    | 47 +++++++++-
 9 files changed, 257 insertions(+), 61 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index f1010b312ee56..c250a9ede39bc 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -705,7 +705,8 @@ class IndexedMemProfReader {
   unsigned RadixTreeSize = 0;
 
   Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
-  Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr);
+  Error deserializeRadixTreeBased(const unsigned char *Start,
+                                  const unsigned char *Ptr);
 
 public:
   IndexedMemProfReader() = default;
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index e07a3189e4259..06d17438fa70f 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -35,10 +35,12 @@ enum IndexedVersion : uint64_t {
   // Version 3: Added a radix tree for call stacks.  Switched to linear IDs for
   // frames and call stacks.
   Version3 = 3,
+  // Version 4: Added CalleeGuids to call site info.
+  Version4 = 4,
 };
 
 constexpr uint64_t MinimumSupportedVersion = Version2;
-constexpr uint64_t MaximumSupportedVersion = Version3;
+constexpr uint64_t MaximumSupportedVersion = Version4;
 
 // Verify that the minimum and maximum satisfy the obvious constraint.
 static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index a3ca03eb92d2e..08dee253f615a 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -1,6 +1,7 @@
 #ifndef LLVM_PROFILEDATA_MEMPROFYAML_H_
 #define LLVM_PROFILEDATA_MEMPROFYAML_H_
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -28,8 +29,9 @@ struct AllMemProfData {
 namespace yaml {
 template <> struct ScalarTraits<memprof::GUIDHex64> {
   static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) {
-    // Print GUID as a 16-digit hexadecimal number.
-    Out << format("0x%016" PRIx64, (uint64_t)Val);
+    // Print GUID as a hexadecimal number with 0x prefix, no padding to keep
+    // test strings compact.
+    Out << format("0x%" PRIx64, (uint64_t)Val);
   }
   static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) {
     // Reject decimal GUIDs.
@@ -156,10 +158,43 @@ template <> struct MappingTraits<memprof::AllocationInfo> {
 // treat the GUID and the fields within MemProfRecord at the same level as if
 // the GUID were part of MemProfRecord.
 template <> struct MappingTraits<memprof::CallSiteInfo> {
+  // Helper class to normalize CalleeGuids to use GUIDHex64 for YAML I/O.
+  class CallSiteInfoWithHex64Guids {
+  public:
+    CallSiteInfoWithHex64Guids(IO &) {}
+    CallSiteInfoWithHex64Guids(IO &, const memprof::CallSiteInfo &CS)
+        : Frames(CS.Frames) {
+      // Convert uint64_t GUIDs to GUIDHex64 for serialization.
+      CalleeGuids.reserve(CS.CalleeGuids.size());
+      for (uint64_t Guid : CS.CalleeGuids)
+        CalleeGuids.push_back(memprof::GUIDHex64(Guid));
+    }
+
+    memprof::CallSiteInfo denormalize(IO &) {
+      memprof::CallSiteInfo CS;
+      CS.Frames = Frames;
+      // Convert GUIDHex64 back to uint64_t GUIDs after deserialization.
+      CS.CalleeGuids.reserve(CalleeGuids.size());
+      for (memprof::GUIDHex64 HexGuid : CalleeGuids)
+        CS.CalleeGuids.push_back(HexGuid.value);
+      return CS;
+    }
+
+    // Keep Frames as is, since MappingTraits<memprof::Frame> handles its
+    // Function GUID.
+    decltype(memprof::CallSiteInfo::Frames) Frames;
+    // Use a vector of GUIDHex64 for CalleeGuids to leverage its ScalarTraits.
+    SmallVector<memprof::GUIDHex64> CalleeGuids;
+  };
+
   static void mapping(IO &Io, memprof::CallSiteInfo &CS) {
-    Io.mapRequired("Frames", CS.Frames);
-    // Keep this optional to make it easier to write tests.
-    Io.mapOptional("CalleeGuids", CS.CalleeGuids);
+    // Use MappingNormalization to handle the conversion between
+    // memprof::CallSiteInfo and CallSiteInfoWithHex64Guids.
+    MappingNormalization<CallSiteInfoWithHex64Guids, memprof::CallSiteInfo>
+        Keys(Io, CS);
+    Io.mapRequired("Frames", Keys->Frames);
+    // Map the normalized CalleeGuids (which are now GUIDHex64).
+    Io.mapOptional("CalleeGuids", Keys->CalleeGuids);
   }
 };
 
@@ -176,6 +211,20 @@ template <> struct MappingTraits<memprof::AllMemProfData> {
     Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
   }
 };
+
+template <> struct SequenceTraits<SmallVector<memprof::GUIDHex64>> {
+  static size_t size(IO &io, SmallVector<memprof::GUIDHex64> &Seq) {
+    return Seq.size();
+  }
+  static memprof::GUIDHex64 &
+  element(IO &io, SmallVector<memprof::GUIDHex64> &Seq, size_t Index) {
+    if (Index >= Seq.size())
+      Seq.resize(Index + 1);
+    return Seq[Index];
+  }
+  static const bool flow = true;
+};
+
 } // namespace yaml
 } // namespace llvm
 
@@ -184,5 +233,6 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
 
 #endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 5e78ffdb86d67..6026dee077fa9 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -214,23 +214,13 @@ static Error writeMemProfV2(ProfOStream &OS,
   return Error::success();
 }
 
-// Write out MemProf Version3 as follows:
-// uint64_t Version
-// uint64_t CallStackPayloadOffset = Offset for the call stack payload
-// uint64_t RecordPayloadOffset = Offset for the record payload
-// uint64_t RecordTableOffset = RecordTableGenerator.Emit
-// uint64_t Num schema entries
-// uint64_t Schema entry 0
-// uint64_t Schema entry 1
-// ....
-// uint64_t Schema entry N - 1
-// Frames serialized one after another
-// Call stacks encoded as a radix tree
-// OnDiskChainedHashTable MemProfRecordData
-static Error writeMemProfV3(ProfOStream &OS,
-                            memprof::IndexedMemProfData &MemProfData,
-                            bool MemProfFullSchema) {
-  OS.write(memprof::Version3);
+static Error writeMemProfRadixTreeBased(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    memprof::IndexedVersion Version, bool MemProfFullSchema) {
+  assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+         "Unsupported version for radix tree format");
+
+  OS.write(Version); // Write the specific version (V3 or V4)
   uint64_t HeaderUpdatePos = OS.tell();
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof record payload offset.
@@ -258,13 +248,12 @@ static Error writeMemProfV3(ProfOStream &OS,
                                      NumElements);
 
   uint64_t RecordPayloadOffset = OS.tell();
-  uint64_t RecordTableOffset =
-      writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
-                          &MemProfCallStackIndexes);
+  uint64_t RecordTableOffset = writeMemProfRecords(
+      OS, MemProfData.Records, &Schema, Version, // Pass Version
+      &MemProfCallStackIndexes);
 
-  // IndexedMemProfReader::deserializeV3 computes the number of elements in the
-  // call stack array from the difference between CallStackPayloadOffset and
-  // RecordPayloadOffset.  Verify that the computation works.
+  // Verify that the computation for the number of elements in the call stack
+  // array works.
   assert(CallStackPayloadOffset +
              NumElements * sizeof(memprof::LinearFrameId) ==
          RecordPayloadOffset);
@@ -279,15 +268,34 @@ static Error writeMemProfV3(ProfOStream &OS,
   return Error::success();
 }
 
+// Write out MemProf Version3
+static Error writeMemProfV3(ProfOStream &OS,
+                            memprof::IndexedMemProfData &MemProfData,
+                            bool MemProfFullSchema) {
+  return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
+                                    MemProfFullSchema);
+}
+
+// Write out MemProf Version4
+static Error writeMemProfV4(ProfOStream &OS,
+                            memprof::IndexedMemProfData &MemProfData,
+                            bool MemProfFullSchema) {
+  return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
+                                    MemProfFullSchema);
+}
+
 // Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-                   memprof::IndexedVersion MemProfVersionRequested,
-                   bool MemProfFullSchema) {
+Error writeMemProf(ProfOStream &OS,
+                          memprof::IndexedMemProfData &MemProfData,
+                          memprof::IndexedVersion MemProfVersionRequested,
+                          bool MemProfFullSchema) {
   switch (MemProfVersionRequested) {
   case memprof::Version2:
     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
   case memprof::Version3:
     return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
+  case memprof::Version4:
+    return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
   }
 
   return make_error<InstrProfError>(
@@ -350,8 +358,8 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
   return Error::success();
 }
 
-Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
-                                          const unsigned char *Ptr) {
+Error IndexedMemProfReader::deserializeRadixTreeBased(
+    const unsigned char *Start, const unsigned char *Ptr) {
   // The offset in the stream right before invoking
   // CallStackTableGenerator.Emit.
   const uint64_t CallStackPayloadOffset =
@@ -382,7 +390,7 @@ Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
   MemProfRecordTable.reset(MemProfRecordHashTable::Create(
       /*Buckets=*/Start + RecordTableOffset,
       /*Payload=*/Start + RecordPayloadOffset,
-      /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version3, Schema)));
+      /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
 
   return Error::success();
 }
@@ -395,8 +403,10 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
   const uint64_t FirstWord =
       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
 
-  if (FirstWord == memprof::Version2 || FirstWord == memprof::Version3) {
-    // Everything is good.  We can proceed to deserialize the rest.
+  // Check if the version is supported
+  if (FirstWord >= memprof::MinimumSupportedVersion &&
+      FirstWord <= memprof::MaximumSupportedVersion) {
+    // Everything is good. We can proceed to deserialize the rest.
     Version = static_cast<memprof::IndexedVersion>(FirstWord);
   } else {
     return make_error<InstrProfError>(
@@ -413,12 +423,13 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
       return E;
     break;
   case memprof::Version3:
-    if (Error E = deserializeV3(Start, Ptr))
+  case memprof::Version4:
+    // V3 and V4 share the same high-level structure (radix tree, linear IDs).
+    if (Error E = deserializeRadixTreeBased(Start, Ptr))
       return E;
     break;
   }
 
   return Error::success();
 }
-
 } // namespace llvm
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 295f2a633e6c7..e6c83430cd8e9 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1456,16 +1456,6 @@ getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord,
   return Record;
 }
 
-static Expected<memprof::MemProfRecord>
-getMemProfRecordV3(const memprof::IndexedMemProfRecord &IndexedRecord,
-                   const unsigned char *FrameBase,
-                   const unsigned char *CallStackBase) {
-  memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
-  memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
-  memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
-  return Record;
-}
-
 Expected<memprof::MemProfRecord>
 IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
   // TODO: Add memprof specific errors.
@@ -1485,13 +1475,20 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
     assert(MemProfCallStackTable && "MemProfCallStackTable must be available");
     return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable,
                               *MemProfCallStackTable);
+  // Combine V3 and V4 cases as the record conversion logic is the same.
   case memprof::Version3:
+  case memprof::Version4:
     assert(!MemProfFrameTable && "MemProfFrameTable must not be available");
     assert(!MemProfCallStackTable &&
            "MemProfCallStackTable must not be available");
     assert(FrameBase && "FrameBase must be available");
     assert(CallStackBase && "CallStackBase must be available");
-    return getMemProfRecordV3(IndexedRecord, FrameBase, CallStackBase);
+    {
+      memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
+      memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
+      memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
+      return Record;
+    }
   }
 
   return make_error<InstrProfError>(
@@ -1505,7 +1502,7 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
 IndexedMemProfReader::getMemProfCallerCalleePairs() const {
   assert(MemProfRecordTable);
-  assert(Version == memprof::Version3);
+  assert(Version == memprof::Version3 || Version == memprof::Version4);
 
   memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
   memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv,
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 1e7d78005cd22..054b522d0ee93 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -48,7 +48,9 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
   switch (Version) {
   case Version2:
     return serializedSizeV2(*this, Schema);
+  // Combine V3 and V4 as the size calculation is the same
   case Version3:
+  case Version4:
     return serializedSizeV3(*this, Schema);
   }
   llvm_unreachable("unsupported MemProf version");
@@ -78,10 +80,26 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
   // The number of callsites we have information for.
   Result += sizeof(uint64_t);
   // The linear call stack ID.
+  // Note: V3 only stored the LinearCallStackId per call site.
   Result += Record.CallSites.size() * sizeof(LinearCallStackId);
   return Result;
 }
 
+static size_t serializedSizeV4(const IndexedMemProfRecord &Record,
+                               const MemProfSchema &Schema) {
+  // The number of alloc sites to serialize.
+  size_t Result = sizeof(uint64_t);
+  for (const IndexedAllocationInfo &N : Record.AllocSites)
+    Result += N.serializedSize(Schema, Version4);
+
+  // The number of callsites we have information for.
+  Result += sizeof(uint64_t);
+  for (const auto &CS : Record.CallSites)
+    Result += sizeof(LinearCallStackId) + sizeof(uint64_t) +
+              CS.CalleeGuids.size() * sizeof(GlobalValue::GUID);
+  return Result;
+}
+
 size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
                                             IndexedVersion Version) const {
   switch (Version) {
@@ -89,6 +107,8 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
     return serializedSizeV2(*this, Schema);
   case Version3:
     return serializedSizeV3(*this, Schema);
+  case Version4:
+    return serializedSizeV4(*this, Schema);
   }
   llvm_unreachable("unsupported MemProf version");
 }
@@ -134,6 +154,32 @@ static void serializeV3(
   }
 }
 
+static void serializeV4(
+    const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
+    raw_ostream &OS,
+    llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
+  using namespace support;
+
+  endian::Writer LE(OS, llvm::endianness::little);
+
+  LE.write<uint64_t>(Record.AllocSites.size());
+  for (const IndexedAllocationInfo &N : Record.AllocSites) {
+    assert(MemProfCallStackIndexes.contains(N.CSId));
+    LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]);
+    N.Info.serialize(Schema, OS);
+  }
+
+  // Related contexts.
+  LE.write<uint64_t>(Record.CallSites.size());
+  for (const auto &CS : Record.CallSites) {
+    assert(MemProfCallStackIndexes.contains(CS.CSId));
+    LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]);
+    LE.write<uint64_t>(CS.CalleeGuids.size());
+    for (const auto &Guid : CS.CalleeGuids)
+      LE.write<GlobalValue::GUID>(Guid);
+  }
+}
+
 void IndexedMemProfRecord::serialize(
     const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
     llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
@@ -145,6 +191,9 @@ void IndexedMemProfRecord::serialize(
   case Version3:
     serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
     return;
+  case Version4:
+    serializeV4(*this, Schema, OS, *MemProfCallStackIndexes);
+    return;
   }
   llvm_unreachable("unsupported MemProf version");
 }
@@ -217,6 +266,46 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
   return Record;
 }
 
+static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema,
+                                          const unsigned char *Ptr) {
+  using namespace support;
+
+  IndexedMemProfRecord Record;
+
+  // Read the meminfo nodes.
+  const uint64_t NumNodes =
+      endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  Record.AllocSites.reserve(NumNodes);
+  for (uint64_t I = 0; I < NumNodes; I++) {
+    IndexedAllocationInfo Node;
+    Node.CSId =
+        endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
+    Node.Info.deserialize(Schema, Ptr);
+    Ptr += PortableMemInfoBlock::serializedSize(Schema);
+    Record.AllocSites.push_back(Node);
+  }
+
+  // Read the callsite information.
+  const uint64_t NumCtxs =
+      endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  Record.CallSites.reserve(NumCtxs);
+  for (uint64_t J = 0; J < NumCtxs; J++) {
+    static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
+    LinearCallStackId CSId =
+        endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
+    const uint64_t NumGuids =
+        endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+    SmallVector<GlobalValue::GUID, 1> Guids;
+    Guids.reserve(NumGuids);
+    for (uint64_t K = 0; K < NumGuids; ++K)
+      Guids.push_back(
+          endian::readNext<GlobalValue::GUID, llvm::endianness::little>(Ptr));
+    Record.CallSites.emplace_back(CSId, std::move(Guids));
+  }
+
+  return Record;
+}
+
 IndexedMemProfRecord
 IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
                                   const unsigned char *Ptr,
@@ -226,6 +315,8 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
     return deserializeV2(Schema, Ptr);
   case Version3:
     return deserializeV3(Schema, Ptr);
+  case Version4:
+    return deserializeV4(Schema, Ptr);
   }
   llvm_unreachable("unsupported MemProf version");
 }
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index a72ef5925a844..1a9875d08444a 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -1,10 +1,8 @@
 ; RUN: split-file %s %t
-; RUN: llvm-profdata merge %t/memprof-in.yaml -o %t/memprof-out.indexed
+; COM: The text format only supports the latest version.
+; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
 ; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
-; RUN: cmp %t/memprof-in.yaml %t/memprof-out.yaml
-
-; This test is expected to fail until the profile format is updated to handle CalleeGuids.
-; XFAIL: *
+; RUN: diff --ignore-space-change %t/memprof-in.yaml %t/memprof-out.yaml
 
 ; Verify that the YAML output is identical to the YAML input.
 ;--- memprof-in.yaml
@@ -32,9 +30,9 @@ HeapProfileRecords:
       - Frames:
         - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
         - { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
-        CalleeGuids: [0x100, 0x200]
+        CalleeGuids: [ 0x100, 0x200 ]
       - Frames:
         - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
         - { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
-        CalleeGuids: [0x300]
+        CalleeGuids: [ 0x300 ]
 ...
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 9a5d3f91d6256..885e06df6c390 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -336,7 +336,8 @@ static cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
     cl::desc("Specify the version of the memprof format to use"),
     cl::init(memprof::Version3),
     cl::values(clEnumValN(memprof::Version2, "2", "version 2"),
-               clEnumValN(memprof::Version3, "3", "version 3")));
+               clEnumValN(memprof::Version3, "3", "version 3"),
+               clEnumValN(memprof::Version4, "4", "version 4")));
 
 static cl::opt<bool> MemProfFullSchema(
     "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 3e430aa4eae58..ea36727df1bee 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -292,6 +292,51 @@ TEST(MemProf, RecordSerializationRoundTripVerion2) {
   EXPECT_EQ(Record, GotRecord);
 }
 
+TEST(MemProf, RecordSerializationRoundTripVersion4) {
+  const auto Schema = getFullSchema();
+
+  MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
+                    /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
+                    /*dealloc_cpu=*/4, /*Histogram=*/0, /*HistogramSize=*/0);
+
+  llvm::SmallVector<CallStackId> CallStackIds = {0x123, 0x456};
+
+  llvm::SmallVector<IndexedCallSiteInfo> CallSites;
+  CallSites.push_back(
+      IndexedCallSiteInfo(0x333, {0xaaa, 0xbbb})); // CSId with GUIDs
+  CallSites.push_back(IndexedCallSiteInfo(0x444)); // CSId without GUIDs
+
+  IndexedMemProfRecord Record;
+  for (const auto &CSId : CallStackIds) {
+    // Use the same info block for both allocation sites.
+    Record.AllocSites.emplace_back(CSId, Info);
+  }
+  Record.CallSites = std::move(CallSites);
+
+  std::string Buffer;
+  llvm::raw_string_ostream OS(Buffer);
+  // Need a dummy map for V4 serialization
+  llvm::DenseMap<CallStackId, LinearCallStackId> DummyMap = {
+      {0x123, 1}, {0x456, 2}, {0x333, 3}, {0x444, 4}};
+  Record.serialize(Schema, OS, Version4, &DummyMap);
+
+  const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
+      Schema, reinterpret_cast<const unsigned char *>(Buffer.data()), Version4);
+
+  // Create the expected record using the linear IDs from the dummy map.
+  IndexedMemProfRecord ExpectedRecord;
+  for (const auto &CSId : CallStackIds) {
+    ExpectedRecord.AllocSites.emplace_back(DummyMap[CSId], Info);
+  }
+  for (const auto &CSInfo :
+       Record.CallSites) { // Use original Record's CallSites to get GUIDs
+    ExpectedRecord.CallSites.emplace_back(DummyMap[CSInfo.CSId],
+                                          CSInfo.CalleeGuids);
+  }
+
+  EXPECT_EQ(ExpectedRecord, GotRecord);
+}
+
 TEST(MemProf, RecordSerializationRoundTripVersion2HotColdSchema) {
   const auto Schema = getHotColdSchema();
 
@@ -791,7 +836,7 @@ TEST(MemProf, YAMLWriterFrame) {
 
   std::string Out = serializeInYAML(F);
   EXPECT_EQ(Out, R"YAML(---
-{ Function: 0x0123456789abcdef, LineOffset: 22, Column: 33, IsInlineFrame: true }
+{ Function: 0x123456789abcdef, LineOffset: 22, Column: 33, IsInlineFrame: true }
 ...
 )YAML");
 }

>From 6d756d6f4f0b6160ab87e32b7429df8b760385d4 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 25 Apr 2025 13:39:00 -0700
Subject: [PATCH 2/4] Fix formatting.

---
 llvm/lib/ProfileData/IndexedMemProfData.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 6026dee077fa9..c9386bef7f259 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -285,10 +285,9 @@ static Error writeMemProfV4(ProfOStream &OS,
 }
 
 // Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS,
-                          memprof::IndexedMemProfData &MemProfData,
-                          memprof::IndexedVersion MemProfVersionRequested,
-                          bool MemProfFullSchema) {
+Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+                   memprof::IndexedVersion MemProfVersionRequested,
+                   bool MemProfFullSchema) {
   switch (MemProfVersionRequested) {
   case memprof::Version2:
     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);

>From 3e4bc1d59340a6fdcff074354970c7ec23811bed Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Sat, 26 Apr 2025 15:35:49 -0700
Subject: [PATCH 3/4] Hoist serialized size computation outside the loop.

---
 llvm/lib/ProfileData/IndexedMemProfData.cpp | 3 +--
 llvm/lib/ProfileData/MemProf.cpp            | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index c9386bef7f259..3d20f7a7a5778 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -249,8 +249,7 @@ static Error writeMemProfRadixTreeBased(
 
   uint64_t RecordPayloadOffset = OS.tell();
   uint64_t RecordTableOffset = writeMemProfRecords(
-      OS, MemProfData.Records, &Schema, Version, // Pass Version
-      &MemProfCallStackIndexes);
+      OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
 
   // Verify that the computation for the number of elements in the call stack
   // array works.
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 054b522d0ee93..e497bbff67d2e 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -276,12 +276,13 @@ static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema,
   const uint64_t NumNodes =
       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
   Record.AllocSites.reserve(NumNodes);
+  const size_t SerializedSize = PortableMemInfoBlock::serializedSize(Schema);
   for (uint64_t I = 0; I < NumNodes; I++) {
     IndexedAllocationInfo Node;
     Node.CSId =
         endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
     Node.Info.deserialize(Schema, Ptr);
-    Ptr += PortableMemInfoBlock::serializedSize(Schema);
+    Ptr += SerializedSize;
     Record.AllocSites.push_back(Node);
   }
 

>From 479426141203637d08031a3397e9de1aef0182af Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Thu, 1 May 2025 19:19:40 -0700
Subject: [PATCH 4/4] Use -b instead of --ignore-space-change to appease the
 Windows CI.

---
 llvm/test/tools/llvm-profdata/memprof-yaml.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index 1a9875d08444a..9766cc50f37d7 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -2,7 +2,7 @@
 ; COM: The text format only supports the latest version.
 ; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
 ; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
-; RUN: diff --ignore-space-change %t/memprof-in.yaml %t/memprof-out.yaml
+; RUN: diff -b %t/memprof-in.yaml %t/memprof-out.yaml
 
 ; Verify that the YAML output is identical to the YAML input.
 ;--- memprof-in.yaml



More information about the llvm-commits mailing list