[llvm] [MemProf] Add v4 which contains CalleeGuids to CallSiteInfo. (PR #137394)
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Thu May 1 19:21:47 PDT 2025
https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/137394
>From a796d02755c9505b54d67805dff98383b8c537aa Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 25 Apr 2025 13:28:20 -0700
Subject: [PATCH 1/4] [MemProf] Add v4 which contains CalleeGuids to
CallSiteInfo.
This patch adds CalleeGuids to the serialized format and increments the version number to 4.
---
.../llvm/ProfileData/InstrProfReader.h | 3 +-
llvm/include/llvm/ProfileData/MemProf.h | 4 +-
llvm/include/llvm/ProfileData/MemProfYAML.h | 60 +++++++++++-
llvm/lib/ProfileData/IndexedMemProfData.cpp | 77 +++++++++-------
llvm/lib/ProfileData/InstrProfReader.cpp | 21 ++---
llvm/lib/ProfileData/MemProf.cpp | 91 +++++++++++++++++++
.../tools/llvm-profdata/memprof-yaml.test | 12 +--
llvm/tools/llvm-profdata/llvm-profdata.cpp | 3 +-
llvm/unittests/ProfileData/MemProfTest.cpp | 47 +++++++++-
9 files changed, 257 insertions(+), 61 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index f1010b312ee56..c250a9ede39bc 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -705,7 +705,8 @@ class IndexedMemProfReader {
unsigned RadixTreeSize = 0;
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
- Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr);
+ Error deserializeRadixTreeBased(const unsigned char *Start,
+ const unsigned char *Ptr);
public:
IndexedMemProfReader() = default;
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index e07a3189e4259..06d17438fa70f 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -35,10 +35,12 @@ enum IndexedVersion : uint64_t {
// Version 3: Added a radix tree for call stacks. Switched to linear IDs for
// frames and call stacks.
Version3 = 3,
+ // Version 4: Added CalleeGuids to call site info.
+ Version4 = 4,
};
constexpr uint64_t MinimumSupportedVersion = Version2;
-constexpr uint64_t MaximumSupportedVersion = Version3;
+constexpr uint64_t MaximumSupportedVersion = Version4;
// Verify that the minimum and maximum satisfy the obvious constraint.
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index a3ca03eb92d2e..08dee253f615a 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -1,6 +1,7 @@
#ifndef LLVM_PROFILEDATA_MEMPROFYAML_H_
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/YAMLTraits.h"
@@ -28,8 +29,9 @@ struct AllMemProfData {
namespace yaml {
template <> struct ScalarTraits<memprof::GUIDHex64> {
static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) {
- // Print GUID as a 16-digit hexadecimal number.
- Out << format("0x%016" PRIx64, (uint64_t)Val);
+ // Print GUID as a hexadecimal number with 0x prefix, no padding to keep
+ // test strings compact.
+ Out << format("0x%" PRIx64, (uint64_t)Val);
}
static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) {
// Reject decimal GUIDs.
@@ -156,10 +158,43 @@ template <> struct MappingTraits<memprof::AllocationInfo> {
// treat the GUID and the fields within MemProfRecord at the same level as if
// the GUID were part of MemProfRecord.
template <> struct MappingTraits<memprof::CallSiteInfo> {
+ // Helper class to normalize CalleeGuids to use GUIDHex64 for YAML I/O.
+ class CallSiteInfoWithHex64Guids {
+ public:
+ CallSiteInfoWithHex64Guids(IO &) {}
+ CallSiteInfoWithHex64Guids(IO &, const memprof::CallSiteInfo &CS)
+ : Frames(CS.Frames) {
+ // Convert uint64_t GUIDs to GUIDHex64 for serialization.
+ CalleeGuids.reserve(CS.CalleeGuids.size());
+ for (uint64_t Guid : CS.CalleeGuids)
+ CalleeGuids.push_back(memprof::GUIDHex64(Guid));
+ }
+
+ memprof::CallSiteInfo denormalize(IO &) {
+ memprof::CallSiteInfo CS;
+ CS.Frames = Frames;
+ // Convert GUIDHex64 back to uint64_t GUIDs after deserialization.
+ CS.CalleeGuids.reserve(CalleeGuids.size());
+ for (memprof::GUIDHex64 HexGuid : CalleeGuids)
+ CS.CalleeGuids.push_back(HexGuid.value);
+ return CS;
+ }
+
+ // Keep Frames as is, since MappingTraits<memprof::Frame> handles its
+ // Function GUID.
+ decltype(memprof::CallSiteInfo::Frames) Frames;
+ // Use a vector of GUIDHex64 for CalleeGuids to leverage its ScalarTraits.
+ SmallVector<memprof::GUIDHex64> CalleeGuids;
+ };
+
static void mapping(IO &Io, memprof::CallSiteInfo &CS) {
- Io.mapRequired("Frames", CS.Frames);
- // Keep this optional to make it easier to write tests.
- Io.mapOptional("CalleeGuids", CS.CalleeGuids);
+ // Use MappingNormalization to handle the conversion between
+ // memprof::CallSiteInfo and CallSiteInfoWithHex64Guids.
+ MappingNormalization<CallSiteInfoWithHex64Guids, memprof::CallSiteInfo>
+ Keys(Io, CS);
+ Io.mapRequired("Frames", Keys->Frames);
+ // Map the normalized CalleeGuids (which are now GUIDHex64).
+ Io.mapOptional("CalleeGuids", Keys->CalleeGuids);
}
};
@@ -176,6 +211,20 @@ template <> struct MappingTraits<memprof::AllMemProfData> {
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
}
};
+
+template <> struct SequenceTraits<SmallVector<memprof::GUIDHex64>> {
+ static size_t size(IO &io, SmallVector<memprof::GUIDHex64> &Seq) {
+ return Seq.size();
+ }
+ static memprof::GUIDHex64 &
+ element(IO &io, SmallVector<memprof::GUIDHex64> &Seq, size_t Index) {
+ if (Index >= Seq.size())
+ Seq.resize(Index + 1);
+ return Seq[Index];
+ }
+ static const bool flow = true;
+};
+
} // namespace yaml
} // namespace llvm
@@ -184,5 +233,6 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 5e78ffdb86d67..6026dee077fa9 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -214,23 +214,13 @@ static Error writeMemProfV2(ProfOStream &OS,
return Error::success();
}
-// Write out MemProf Version3 as follows:
-// uint64_t Version
-// uint64_t CallStackPayloadOffset = Offset for the call stack payload
-// uint64_t RecordPayloadOffset = Offset for the record payload
-// uint64_t RecordTableOffset = RecordTableGenerator.Emit
-// uint64_t Num schema entries
-// uint64_t Schema entry 0
-// uint64_t Schema entry 1
-// ....
-// uint64_t Schema entry N - 1
-// Frames serialized one after another
-// Call stacks encoded as a radix tree
-// OnDiskChainedHashTable MemProfRecordData
-static Error writeMemProfV3(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- bool MemProfFullSchema) {
- OS.write(memprof::Version3);
+static Error writeMemProfRadixTreeBased(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion Version, bool MemProfFullSchema) {
+ assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+ "Unsupported version for radix tree format");
+
+ OS.write(Version); // Write the specific version (V3 or V4)
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
@@ -258,13 +248,12 @@ static Error writeMemProfV3(ProfOStream &OS,
NumElements);
uint64_t RecordPayloadOffset = OS.tell();
- uint64_t RecordTableOffset =
- writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
- &MemProfCallStackIndexes);
+ uint64_t RecordTableOffset = writeMemProfRecords(
+ OS, MemProfData.Records, &Schema, Version, // Pass Version
+ &MemProfCallStackIndexes);
- // IndexedMemProfReader::deserializeV3 computes the number of elements in the
- // call stack array from the difference between CallStackPayloadOffset and
- // RecordPayloadOffset. Verify that the computation works.
+ // Verify that the computation for the number of elements in the call stack
+ // array works.
assert(CallStackPayloadOffset +
NumElements * sizeof(memprof::LinearFrameId) ==
RecordPayloadOffset);
@@ -279,15 +268,34 @@ static Error writeMemProfV3(ProfOStream &OS,
return Error::success();
}
+// Write out MemProf Version3
+static Error writeMemProfV3(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema) {
+ return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
+ MemProfFullSchema);
+}
+
+// Write out MemProf Version4
+static Error writeMemProfV4(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema) {
+ return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
+ MemProfFullSchema);
+}
+
// Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema) {
+Error writeMemProf(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested,
+ bool MemProfFullSchema) {
switch (MemProfVersionRequested) {
case memprof::Version2:
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
case memprof::Version3:
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
+ case memprof::Version4:
+ return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
}
return make_error<InstrProfError>(
@@ -350,8 +358,8 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
return Error::success();
}
-Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
- const unsigned char *Ptr) {
+Error IndexedMemProfReader::deserializeRadixTreeBased(
+ const unsigned char *Start, const unsigned char *Ptr) {
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
const uint64_t CallStackPayloadOffset =
@@ -382,7 +390,7 @@ Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Start + RecordPayloadOffset,
- /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version3, Schema)));
+ /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
return Error::success();
}
@@ -395,8 +403,10 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
const uint64_t FirstWord =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
- if (FirstWord == memprof::Version2 || FirstWord == memprof::Version3) {
- // Everything is good. We can proceed to deserialize the rest.
+ // Check if the version is supported
+ if (FirstWord >= memprof::MinimumSupportedVersion &&
+ FirstWord <= memprof::MaximumSupportedVersion) {
+ // Everything is good. We can proceed to deserialize the rest.
Version = static_cast<memprof::IndexedVersion>(FirstWord);
} else {
return make_error<InstrProfError>(
@@ -413,12 +423,13 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
return E;
break;
case memprof::Version3:
- if (Error E = deserializeV3(Start, Ptr))
+ case memprof::Version4:
+ // V3 and V4 share the same high-level structure (radix tree, linear IDs).
+ if (Error E = deserializeRadixTreeBased(Start, Ptr))
return E;
break;
}
return Error::success();
}
-
} // namespace llvm
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 295f2a633e6c7..e6c83430cd8e9 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1456,16 +1456,6 @@ getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord,
return Record;
}
-static Expected<memprof::MemProfRecord>
-getMemProfRecordV3(const memprof::IndexedMemProfRecord &IndexedRecord,
- const unsigned char *FrameBase,
- const unsigned char *CallStackBase) {
- memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
- memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
- memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
- return Record;
-}
-
Expected<memprof::MemProfRecord>
IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
// TODO: Add memprof specific errors.
@@ -1485,13 +1475,20 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
assert(MemProfCallStackTable && "MemProfCallStackTable must be available");
return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable,
*MemProfCallStackTable);
+ // Combine V3 and V4 cases as the record conversion logic is the same.
case memprof::Version3:
+ case memprof::Version4:
assert(!MemProfFrameTable && "MemProfFrameTable must not be available");
assert(!MemProfCallStackTable &&
"MemProfCallStackTable must not be available");
assert(FrameBase && "FrameBase must be available");
assert(CallStackBase && "CallStackBase must be available");
- return getMemProfRecordV3(IndexedRecord, FrameBase, CallStackBase);
+ {
+ memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
+ memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
+ memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
+ return Record;
+ }
}
return make_error<InstrProfError>(
@@ -1505,7 +1502,7 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
IndexedMemProfReader::getMemProfCallerCalleePairs() const {
assert(MemProfRecordTable);
- assert(Version == memprof::Version3);
+ assert(Version == memprof::Version3 || Version == memprof::Version4);
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv,
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 1e7d78005cd22..054b522d0ee93 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -48,7 +48,9 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
switch (Version) {
case Version2:
return serializedSizeV2(*this, Schema);
+ // Combine V3 and V4 as the size calculation is the same
case Version3:
+ case Version4:
return serializedSizeV3(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
@@ -78,10 +80,26 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The linear call stack ID.
+ // Note: V3 only stored the LinearCallStackId per call site.
Result += Record.CallSites.size() * sizeof(LinearCallStackId);
return Result;
}
+static size_t serializedSizeV4(const IndexedMemProfRecord &Record,
+ const MemProfSchema &Schema) {
+ // The number of alloc sites to serialize.
+ size_t Result = sizeof(uint64_t);
+ for (const IndexedAllocationInfo &N : Record.AllocSites)
+ Result += N.serializedSize(Schema, Version4);
+
+ // The number of callsites we have information for.
+ Result += sizeof(uint64_t);
+ for (const auto &CS : Record.CallSites)
+ Result += sizeof(LinearCallStackId) + sizeof(uint64_t) +
+ CS.CalleeGuids.size() * sizeof(GlobalValue::GUID);
+ return Result;
+}
+
size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
IndexedVersion Version) const {
switch (Version) {
@@ -89,6 +107,8 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
return serializedSizeV2(*this, Schema);
case Version3:
return serializedSizeV3(*this, Schema);
+ case Version4:
+ return serializedSizeV4(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
}
@@ -134,6 +154,32 @@ static void serializeV3(
}
}
+static void serializeV4(
+ const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
+ raw_ostream &OS,
+ llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
+ using namespace support;
+
+ endian::Writer LE(OS, llvm::endianness::little);
+
+ LE.write<uint64_t>(Record.AllocSites.size());
+ for (const IndexedAllocationInfo &N : Record.AllocSites) {
+ assert(MemProfCallStackIndexes.contains(N.CSId));
+ LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]);
+ N.Info.serialize(Schema, OS);
+ }
+
+ // Related contexts.
+ LE.write<uint64_t>(Record.CallSites.size());
+ for (const auto &CS : Record.CallSites) {
+ assert(MemProfCallStackIndexes.contains(CS.CSId));
+ LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]);
+ LE.write<uint64_t>(CS.CalleeGuids.size());
+ for (const auto &Guid : CS.CalleeGuids)
+ LE.write<GlobalValue::GUID>(Guid);
+ }
+}
+
void IndexedMemProfRecord::serialize(
const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
@@ -145,6 +191,9 @@ void IndexedMemProfRecord::serialize(
case Version3:
serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
return;
+ case Version4:
+ serializeV4(*this, Schema, OS, *MemProfCallStackIndexes);
+ return;
}
llvm_unreachable("unsupported MemProf version");
}
@@ -217,6 +266,46 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
return Record;
}
+static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema,
+ const unsigned char *Ptr) {
+ using namespace support;
+
+ IndexedMemProfRecord Record;
+
+ // Read the meminfo nodes.
+ const uint64_t NumNodes =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ Record.AllocSites.reserve(NumNodes);
+ for (uint64_t I = 0; I < NumNodes; I++) {
+ IndexedAllocationInfo Node;
+ Node.CSId =
+ endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
+ Node.Info.deserialize(Schema, Ptr);
+ Ptr += PortableMemInfoBlock::serializedSize(Schema);
+ Record.AllocSites.push_back(Node);
+ }
+
+ // Read the callsite information.
+ const uint64_t NumCtxs =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ Record.CallSites.reserve(NumCtxs);
+ for (uint64_t J = 0; J < NumCtxs; J++) {
+ static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
+ LinearCallStackId CSId =
+ endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
+ const uint64_t NumGuids =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ SmallVector<GlobalValue::GUID, 1> Guids;
+ Guids.reserve(NumGuids);
+ for (uint64_t K = 0; K < NumGuids; ++K)
+ Guids.push_back(
+ endian::readNext<GlobalValue::GUID, llvm::endianness::little>(Ptr));
+ Record.CallSites.emplace_back(CSId, std::move(Guids));
+ }
+
+ return Record;
+}
+
IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr,
@@ -226,6 +315,8 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
return deserializeV2(Schema, Ptr);
case Version3:
return deserializeV3(Schema, Ptr);
+ case Version4:
+ return deserializeV4(Schema, Ptr);
}
llvm_unreachable("unsupported MemProf version");
}
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index a72ef5925a844..1a9875d08444a 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -1,10 +1,8 @@
; RUN: split-file %s %t
-; RUN: llvm-profdata merge %t/memprof-in.yaml -o %t/memprof-out.indexed
+; COM: The text format only supports the latest version.
+; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
-; RUN: cmp %t/memprof-in.yaml %t/memprof-out.yaml
-
-; This test is expected to fail until the profile format is updated to handle CalleeGuids.
-; XFAIL: *
+; RUN: diff --ignore-space-change %t/memprof-in.yaml %t/memprof-out.yaml
; Verify that the YAML output is identical to the YAML input.
;--- memprof-in.yaml
@@ -32,9 +30,9 @@ HeapProfileRecords:
- Frames:
- { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
- { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
- CalleeGuids: [0x100, 0x200]
+ CalleeGuids: [ 0x100, 0x200 ]
- Frames:
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
- CalleeGuids: [0x300]
+ CalleeGuids: [ 0x300 ]
...
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 9a5d3f91d6256..885e06df6c390 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -336,7 +336,8 @@ static cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
cl::desc("Specify the version of the memprof format to use"),
cl::init(memprof::Version3),
cl::values(clEnumValN(memprof::Version2, "2", "version 2"),
- clEnumValN(memprof::Version3, "3", "version 3")));
+ clEnumValN(memprof::Version3, "3", "version 3"),
+ clEnumValN(memprof::Version4, "4", "version 4")));
static cl::opt<bool> MemProfFullSchema(
"memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 3e430aa4eae58..ea36727df1bee 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -292,6 +292,51 @@ TEST(MemProf, RecordSerializationRoundTripVerion2) {
EXPECT_EQ(Record, GotRecord);
}
+TEST(MemProf, RecordSerializationRoundTripVersion4) {
+ const auto Schema = getFullSchema();
+
+ MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
+ /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
+ /*dealloc_cpu=*/4, /*Histogram=*/0, /*HistogramSize=*/0);
+
+ llvm::SmallVector<CallStackId> CallStackIds = {0x123, 0x456};
+
+ llvm::SmallVector<IndexedCallSiteInfo> CallSites;
+ CallSites.push_back(
+ IndexedCallSiteInfo(0x333, {0xaaa, 0xbbb})); // CSId with GUIDs
+ CallSites.push_back(IndexedCallSiteInfo(0x444)); // CSId without GUIDs
+
+ IndexedMemProfRecord Record;
+ for (const auto &CSId : CallStackIds) {
+ // Use the same info block for both allocation sites.
+ Record.AllocSites.emplace_back(CSId, Info);
+ }
+ Record.CallSites = std::move(CallSites);
+
+ std::string Buffer;
+ llvm::raw_string_ostream OS(Buffer);
+ // Need a dummy map for V4 serialization
+ llvm::DenseMap<CallStackId, LinearCallStackId> DummyMap = {
+ {0x123, 1}, {0x456, 2}, {0x333, 3}, {0x444, 4}};
+ Record.serialize(Schema, OS, Version4, &DummyMap);
+
+ const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
+ Schema, reinterpret_cast<const unsigned char *>(Buffer.data()), Version4);
+
+ // Create the expected record using the linear IDs from the dummy map.
+ IndexedMemProfRecord ExpectedRecord;
+ for (const auto &CSId : CallStackIds) {
+ ExpectedRecord.AllocSites.emplace_back(DummyMap[CSId], Info);
+ }
+ for (const auto &CSInfo :
+ Record.CallSites) { // Use original Record's CallSites to get GUIDs
+ ExpectedRecord.CallSites.emplace_back(DummyMap[CSInfo.CSId],
+ CSInfo.CalleeGuids);
+ }
+
+ EXPECT_EQ(ExpectedRecord, GotRecord);
+}
+
TEST(MemProf, RecordSerializationRoundTripVersion2HotColdSchema) {
const auto Schema = getHotColdSchema();
@@ -791,7 +836,7 @@ TEST(MemProf, YAMLWriterFrame) {
std::string Out = serializeInYAML(F);
EXPECT_EQ(Out, R"YAML(---
-{ Function: 0x0123456789abcdef, LineOffset: 22, Column: 33, IsInlineFrame: true }
+{ Function: 0x123456789abcdef, LineOffset: 22, Column: 33, IsInlineFrame: true }
...
)YAML");
}
>From 6d756d6f4f0b6160ab87e32b7429df8b760385d4 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 25 Apr 2025 13:39:00 -0700
Subject: [PATCH 2/4] Fix formatting.
---
llvm/lib/ProfileData/IndexedMemProfData.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 6026dee077fa9..c9386bef7f259 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -285,10 +285,9 @@ static Error writeMemProfV4(ProfOStream &OS,
}
// Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema) {
+Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested,
+ bool MemProfFullSchema) {
switch (MemProfVersionRequested) {
case memprof::Version2:
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
>From 3e4bc1d59340a6fdcff074354970c7ec23811bed Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Sat, 26 Apr 2025 15:35:49 -0700
Subject: [PATCH 3/4] Hoist serialized size computation outside the loop.
---
llvm/lib/ProfileData/IndexedMemProfData.cpp | 3 +--
llvm/lib/ProfileData/MemProf.cpp | 3 ++-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index c9386bef7f259..3d20f7a7a5778 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -249,8 +249,7 @@ static Error writeMemProfRadixTreeBased(
uint64_t RecordPayloadOffset = OS.tell();
uint64_t RecordTableOffset = writeMemProfRecords(
- OS, MemProfData.Records, &Schema, Version, // Pass Version
- &MemProfCallStackIndexes);
+ OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
// Verify that the computation for the number of elements in the call stack
// array works.
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 054b522d0ee93..e497bbff67d2e 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -276,12 +276,13 @@ static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema,
const uint64_t NumNodes =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Record.AllocSites.reserve(NumNodes);
+ const size_t SerializedSize = PortableMemInfoBlock::serializedSize(Schema);
for (uint64_t I = 0; I < NumNodes; I++) {
IndexedAllocationInfo Node;
Node.CSId =
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
Node.Info.deserialize(Schema, Ptr);
- Ptr += PortableMemInfoBlock::serializedSize(Schema);
+ Ptr += SerializedSize;
Record.AllocSites.push_back(Node);
}
>From 479426141203637d08031a3397e9de1aef0182af Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Thu, 1 May 2025 19:19:40 -0700
Subject: [PATCH 4/4] Use -b instead of --ignore-space-change to appease the
Windows CI.
---
llvm/test/tools/llvm-profdata/memprof-yaml.test | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index 1a9875d08444a..9766cc50f37d7 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -2,7 +2,7 @@
; COM: The text format only supports the latest version.
; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
-; RUN: diff --ignore-space-change %t/memprof-in.yaml %t/memprof-out.yaml
+; RUN: diff -b %t/memprof-in.yaml %t/memprof-out.yaml
; Verify that the YAML output is identical to the YAML input.
;--- memprof-in.yaml
More information about the llvm-commits
mailing list