[llvm] [memprof] Add YAML-based deserialization for MemProf profile (PR #117829)
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 21:48:49 PST 2024
https://github.com/kazutakahirata updated https://github.com/llvm/llvm-project/pull/117829
>From 1ee8dff20f26e5a39eb930d878675994a001f87b Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Fri, 19 Apr 2024 07:01:41 -0700
Subject: [PATCH 1/2] [memprof] Add YAML-based deserialization for MemProf
profile
This patch adds YAML-based deserialization for MemProf profile.
It's been painful to write tests for MemProf passes because we do not
have a text format for the MemProf profile. We would write a test
case in C++, run it for a binary MemProf profile, and then finally run
a test written in LLVM IR with the binary profile.
This patch paves the way toward YAML-based MemProf profile.
Specifically, it adds new class YAMLMemProfReader derived from
MemProfReader. For now, it only adds a function to parse StringRef
pointing to YAML data. Subseqeunt patches will wire it to
llvm-profdata and read from a file.
The field names are based on various printYAML functions in MemProf.h.
I'm not aiming for compatibility with the format used in printYAML,
but I don't see a point in changing the field names.
---
llvm/include/llvm/ProfileData/MemProf.h | 22 ++++
llvm/include/llvm/ProfileData/MemProfReader.h | 6 +
llvm/lib/ProfileData/MemProfReader.cpp | 106 ++++++++++++++++++
llvm/unittests/ProfileData/MemProfTest.cpp | 77 +++++++++++++
4 files changed, 211 insertions(+)
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index d64addcfe3ed06..3b631fad4029e7 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -19,6 +19,10 @@
#include <optional>
namespace llvm {
+namespace yaml {
+template <typename T> struct CustomMappingTraits;
+} // namespace yaml
+
namespace memprof {
struct MemProfRecord;
@@ -193,6 +197,9 @@ struct PortableMemInfoBlock {
return Result;
}
+ // Give YAML access to the individual MIB fields.
+ friend struct yaml::CustomMappingTraits<memprof::PortableMemInfoBlock>;
+
private:
// The set of available fields, indexed by Meta::Name.
std::bitset<llvm::to_underlying(Meta::Size)> Schema;
@@ -362,6 +369,8 @@ struct IndexedAllocationInfo {
IndexedAllocationInfo(CallStackId CSId, const MemInfoBlock &MB,
const MemProfSchema &Schema = getFullSchema())
: CSId(CSId), Info(MB, Schema) {}
+ IndexedAllocationInfo(CallStackId CSId, const PortableMemInfoBlock &MB)
+ : CSId(CSId), Info(MB) {}
// Returns the size in bytes when this allocation info struct is serialized.
size_t serializedSize(const MemProfSchema &Schema,
@@ -498,6 +507,19 @@ struct MemProfRecord {
}
};
+// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields
+// within MemProfRecord at the same level as if the GUID were part of
+// MemProfRecord.
+struct GUIDMemProfRecordPair {
+ GlobalValue::GUID GUID;
+ MemProfRecord Record;
+};
+
+// The top-level data structure, only used with YAML for now.
+struct AllMemProfData {
+ std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
+};
+
// Reads a memprof schema from a buffer. All entries in the buffer are
// interpreted as uint64_t. The first entry in the buffer denotes the number of
// ids in the schema. Subsequent entries are integers which map to memprof::Meta
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index bf2f91548fac07..0529f794606465 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -209,6 +209,12 @@ class RawMemProfReader final : public MemProfReader {
// A mapping of the hash to symbol name, only used if KeepSymbolName is true.
llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
};
+
+class YAMLMemProfReader final : public MemProfReader {
+public:
+ YAMLMemProfReader() = default;
+ void parse(StringRef YAMLData);
+};
} // namespace memprof
} // namespace llvm
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 921ff3429fed74..e0b8207020dced 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -40,6 +40,71 @@
#include "llvm/Support/Path.h"
#define DEBUG_TYPE "memprof"
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<memprof::Frame> {
+ static void mapping(IO &Io, memprof::Frame &F) {
+ Io.mapRequired("Function", F.Function);
+ Io.mapRequired("LineOffset", F.LineOffset);
+ Io.mapRequired("Column", F.Column);
+ Io.mapRequired("Inline", F.IsInlineFrame);
+ }
+};
+
+template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> {
+ static void inputOne(IO &Io, StringRef KeyStr,
+ memprof::PortableMemInfoBlock &MIB) {
+ // PortableMemInfoBlock keeps track of the set of fields that actually have
+ // values. We update the set here as we receive a key-value pair from the
+ // YAML document.
+#define MIBEntryDef(NameTag, Name, Type) \
+ if (KeyStr == #Name) { \
+ Io.mapRequired(KeyStr.str().c_str(), MIB.Name); \
+ MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \
+ return; \
+ }
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ Io.setError("Key is not a valid validation event");
+ }
+
+ static void output(IO &Io, memprof::PortableMemInfoBlock &VI) {
+ llvm_unreachable("To be implemented");
+ }
+};
+
+template <> struct MappingTraits<memprof::AllocationInfo> {
+ static void mapping(IO &Io, memprof::AllocationInfo &AI) {
+ Io.mapRequired("Callstack", AI.CallStack);
+ Io.mapRequired("MemInfoBlock", AI.Info);
+ }
+};
+
+// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
+// treat the GUID and the fields within MemProfRecord at the same level as if
+// the GUID were part of MemProfRecord.
+template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
+ static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
+ Io.mapRequired("GUID", Pair.GUID);
+ Io.mapRequired("AllocSites", Pair.Record.AllocSites);
+ Io.mapRequired("CallSites", Pair.Record.CallSites);
+ }
+};
+
+template <> struct MappingTraits<memprof::AllMemProfData> {
+ static void mapping(IO &Io, memprof::AllMemProfData &Data) {
+ Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
+ }
+};
+} // namespace yaml
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
+LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
+
namespace llvm {
namespace memprof {
namespace {
@@ -756,5 +821,46 @@ Error RawMemProfReader::readNextRecord(
};
return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
}
+
+void YAMLMemProfReader::parse(StringRef YAMLData) {
+ memprof::AllMemProfData Doc;
+ yaml::Input Yin(YAMLData);
+
+ Yin >> Doc;
+ if (Yin.error())
+ return;
+
+ // Add a call stack to MemProfData.CallStacks and return its CallStackId.
+ auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId {
+ SmallVector<FrameId> IndexedCallStack;
+ IndexedCallStack.reserve(CallStack.size());
+ for (const Frame &F : CallStack) {
+ FrameId Id = F.hash();
+ MemProfData.Frames.try_emplace(Id, F);
+ IndexedCallStack.push_back(Id);
+ }
+ CallStackId CSId = hashCallStack(IndexedCallStack);
+ MemProfData.CallStacks.try_emplace(CSId, std::move(IndexedCallStack));
+ return CSId;
+ };
+
+ for (const auto &[GUID, Record] : Doc.HeapProfileRecords) {
+ IndexedMemProfRecord IndexedRecord;
+
+ // Convert AllocationInfo to IndexedAllocationInfo.
+ for (const AllocationInfo &AI : Record.AllocSites) {
+ CallStackId CSId = AddCallStack(AI.CallStack);
+ IndexedRecord.AllocSites.emplace_back(CSId, AI.Info);
+ }
+
+ // Populate CallSiteIds.
+ for (const auto &CallSite : Record.CallSites) {
+ CallStackId CSId = AddCallStack(CallSite);
+ IndexedRecord.CallSiteIds.push_back(CSId);
+ }
+
+ MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
+ }
+}
} // namespace memprof
} // namespace llvm
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index b3b6249342d630..53a4c8a09472ae 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -742,4 +742,81 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
llvm::memprof::hashCallStack(CS4), 10U)));
}
+
+// Verify that we can parse YAML and retrieve IndexedMemProfData as expected.
+TEST(MemProf, YAMLParser) {
+ StringRef YAMLData = R"YAML(
+---
+HeapProfileRecords:
+- GUID: 0xdeadbeef12345678
+ AllocSites:
+ - Callstack:
+ - {Function: 0x100, LineOffset: 11, Column: 10, Inline: true}
+ - {Function: 0x200, LineOffset: 22, Column: 20, Inline: false}
+ MemInfoBlock:
+ AllocCount: 777
+ TotalSize: 888
+ - Callstack:
+ - {Function: 0x300, LineOffset: 33, Column: 30, Inline: false}
+ - {Function: 0x400, LineOffset: 44, Column: 40, Inline: true}
+ MemInfoBlock:
+ AllocCount: 666
+ TotalSize: 555
+ CallSites:
+ - - {Function: 0x500, LineOffset: 55, Column: 50, Inline: true}
+ - {Function: 0x600, LineOffset: 66, Column: 60, Inline: false}
+ - - {Function: 0x700, LineOffset: 77, Column: 70, Inline: true}
+ - {Function: 0x800, LineOffset: 88, Column: 80, Inline: false}
+)YAML";
+
+ llvm::memprof::YAMLMemProfReader YAMLReader;
+ YAMLReader.parse(YAMLData);
+ llvm::memprof::IndexedMemProfData MemProfData = YAMLReader.takeMemProfData();
+
+ Frame F1(0x100, 11, 10, true);
+ Frame F2(0x200, 22, 20, false);
+ Frame F3(0x300, 33, 30, false);
+ Frame F4(0x400, 44, 40, true);
+ Frame F5(0x500, 55, 50, true);
+ Frame F6(0x600, 66, 60, false);
+ Frame F7(0x700, 77, 70, true);
+ Frame F8(0x800, 88, 80, false);
+
+ llvm::SmallVector<FrameId> CS1 = {F1.hash(), F2.hash()};
+ llvm::SmallVector<FrameId> CS2 = {F3.hash(), F4.hash()};
+ llvm::SmallVector<FrameId> CS3 = {F5.hash(), F6.hash()};
+ llvm::SmallVector<FrameId> CS4 = {F7.hash(), F8.hash()};
+
+ // Verify the entire contents of MemProfData.Frames.
+ EXPECT_THAT(
+ MemProfData.Frames,
+ ::testing::UnorderedElementsAre(
+ ::testing::Pair(F1.hash(), F1), ::testing::Pair(F2.hash(), F2),
+ ::testing::Pair(F3.hash(), F3), ::testing::Pair(F4.hash(), F4),
+ ::testing::Pair(F5.hash(), F5), ::testing::Pair(F6.hash(), F6),
+ ::testing::Pair(F7.hash(), F7), ::testing::Pair(F8.hash(), F8)));
+
+ // Verify the entire contents of MemProfData.Frames.
+ EXPECT_THAT(MemProfData.CallStacks,
+ ::testing::UnorderedElementsAre(
+ ::testing::Pair(llvm::memprof::hashCallStack(CS1), CS1),
+ ::testing::Pair(llvm::memprof::hashCallStack(CS2), CS2),
+ ::testing::Pair(llvm::memprof::hashCallStack(CS3), CS3),
+ ::testing::Pair(llvm::memprof::hashCallStack(CS4), CS4)));
+
+ // Verify the entire contents of MemProfData.Records.
+ ASSERT_THAT(MemProfData.Records, SizeIs(1));
+ const auto &[GUID, Record] = *MemProfData.Records.begin();
+ EXPECT_EQ(GUID, 0xdeadbeef12345678ULL);
+ ASSERT_THAT(Record.AllocSites, SizeIs(2));
+ EXPECT_EQ(Record.AllocSites[0].CSId, llvm::memprof::hashCallStack(CS1));
+ EXPECT_EQ(Record.AllocSites[0].Info.getAllocCount(), 777U);
+ EXPECT_EQ(Record.AllocSites[0].Info.getTotalSize(), 888U);
+ EXPECT_EQ(Record.AllocSites[1].CSId, llvm::memprof::hashCallStack(CS2));
+ EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U);
+ EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U);
+ EXPECT_THAT(Record.CallSiteIds,
+ ::testing::ElementsAre(llvm::memprof::hashCallStack(CS3),
+ llvm::memprof::hashCallStack(CS4)));
+}
} // namespace
>From 46aedcef7db8acc2abfeff3adf5613dd121b603b Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Tue, 26 Nov 2024 19:22:28 -0800
Subject: [PATCH 2/2] Address comments.
---
llvm/lib/ProfileData/MemProfReader.cpp | 14 ++++++++++
llvm/unittests/ProfileData/MemProfTest.cpp | 32 +++++++++++-----------
2 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index e0b8207020dced..16729c8652549b 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -49,6 +49,20 @@ template <> struct MappingTraits<memprof::Frame> {
Io.mapRequired("LineOffset", F.LineOffset);
Io.mapRequired("Column", F.Column);
Io.mapRequired("Inline", F.IsInlineFrame);
+
+ // Assert that the definition of Frame matches what we expect. The
+ // structured bindings below detect changes to the number of fields.
+ // static_assert checks the type of each field.
+ const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F;
+ static_assert(
+ std::is_same_v<remove_cvref_t<decltype(Function)>, GlobalValue::GUID>);
+ static_assert(std::is_same_v<remove_cvref_t<decltype(SymbolName)>,
+ std::unique_ptr<std::string>>);
+ static_assert(
+ std::is_same_v<remove_cvref_t<decltype(LineOffset)>, uint32_t>);
+ static_assert(std::is_same_v<remove_cvref_t<decltype(Column)>, uint32_t>);
+ static_assert(
+ std::is_same_v<remove_cvref_t<decltype(IsInlineFrame)>, bool>);
}
};
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 53a4c8a09472ae..5ab860e025b8ce 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -34,6 +34,7 @@ using ::llvm::memprof::CallStackId;
using ::llvm::memprof::CallStackMap;
using ::llvm::memprof::Frame;
using ::llvm::memprof::FrameId;
+using ::llvm::memprof::hashCallStack;
using ::llvm::memprof::IndexedAllocationInfo;
using ::llvm::memprof::IndexedMemProfRecord;
using ::llvm::memprof::MemInfoBlock;
@@ -46,8 +47,11 @@ using ::llvm::memprof::RawMemProfReader;
using ::llvm::memprof::SegmentEntry;
using ::llvm::object::SectionedAddress;
using ::llvm::symbolize::SymbolizableModule;
+using ::testing::ElementsAre;
+using ::testing::Pair;
using ::testing::Return;
using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
class MockSymbolizer : public SymbolizableModule {
public:
@@ -788,35 +792,31 @@ TEST(MemProf, YAMLParser) {
llvm::SmallVector<FrameId> CS4 = {F7.hash(), F8.hash()};
// Verify the entire contents of MemProfData.Frames.
- EXPECT_THAT(
- MemProfData.Frames,
- ::testing::UnorderedElementsAre(
- ::testing::Pair(F1.hash(), F1), ::testing::Pair(F2.hash(), F2),
- ::testing::Pair(F3.hash(), F3), ::testing::Pair(F4.hash(), F4),
- ::testing::Pair(F5.hash(), F5), ::testing::Pair(F6.hash(), F6),
- ::testing::Pair(F7.hash(), F7), ::testing::Pair(F8.hash(), F8)));
+ EXPECT_THAT(MemProfData.Frames,
+ UnorderedElementsAre(Pair(F1.hash(), F1), Pair(F2.hash(), F2),
+ Pair(F3.hash(), F3), Pair(F4.hash(), F4),
+ Pair(F5.hash(), F5), Pair(F6.hash(), F6),
+ Pair(F7.hash(), F7), Pair(F8.hash(), F8)));
// Verify the entire contents of MemProfData.Frames.
EXPECT_THAT(MemProfData.CallStacks,
- ::testing::UnorderedElementsAre(
- ::testing::Pair(llvm::memprof::hashCallStack(CS1), CS1),
- ::testing::Pair(llvm::memprof::hashCallStack(CS2), CS2),
- ::testing::Pair(llvm::memprof::hashCallStack(CS3), CS3),
- ::testing::Pair(llvm::memprof::hashCallStack(CS4), CS4)));
+ UnorderedElementsAre(Pair(hashCallStack(CS1), CS1),
+ Pair(hashCallStack(CS2), CS2),
+ Pair(hashCallStack(CS3), CS3),
+ Pair(hashCallStack(CS4), CS4)));
// Verify the entire contents of MemProfData.Records.
ASSERT_THAT(MemProfData.Records, SizeIs(1));
const auto &[GUID, Record] = *MemProfData.Records.begin();
EXPECT_EQ(GUID, 0xdeadbeef12345678ULL);
ASSERT_THAT(Record.AllocSites, SizeIs(2));
- EXPECT_EQ(Record.AllocSites[0].CSId, llvm::memprof::hashCallStack(CS1));
+ EXPECT_EQ(Record.AllocSites[0].CSId, hashCallStack(CS1));
EXPECT_EQ(Record.AllocSites[0].Info.getAllocCount(), 777U);
EXPECT_EQ(Record.AllocSites[0].Info.getTotalSize(), 888U);
- EXPECT_EQ(Record.AllocSites[1].CSId, llvm::memprof::hashCallStack(CS2));
+ EXPECT_EQ(Record.AllocSites[1].CSId, hashCallStack(CS2));
EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U);
EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U);
EXPECT_THAT(Record.CallSiteIds,
- ::testing::ElementsAre(llvm::memprof::hashCallStack(CS3),
- llvm::memprof::hashCallStack(CS4)));
+ ElementsAre(hashCallStack(CS3), hashCallStack(CS4)));
}
} // namespace
More information about the llvm-commits
mailing list