[llvm] [MemProf] Extend CallSite information to include potential callees. (PR #130441)
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 8 14:27:58 PST 2025
https://github.com/snehasish created https://github.com/llvm/llvm-project/pull/130441
* Added YAML traits for `CallSiteInfo`
* Updated the `MemProfReader` to pass `Frames` instead of the entire `CallSiteInfo`
* Updated test cases to use `testing::Field`
* Add YAML sequence traits for CallSiteInfo in MemProfYAML
* Also extend IndexedMemProfRecord
For now we only read and write the additional information from the YAML format. I will use this to drive the changes in the MemProfUse pass before we make changes to the format and add serialization / deserialization support.
>From 81c5f65992ba038cccdec913e40c9d7741eba08f Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Sat, 8 Mar 2025 02:32:59 +0000
Subject: [PATCH] [MemProf] Extend CallSite information to include potential
callees.
* Added YAML traits for `CallSiteInfo`
* Updated the `MemProfReader` to pass `Frames` instead of the entire `CallSiteInfo`
* Updated test cases to use `testing::Field`
* Add YAML sequence traits for CallSiteInfo in MemProfYAML
---
llvm/include/llvm/ProfileData/MemProf.h | 55 ++++++++++++++++--
llvm/include/llvm/ProfileData/MemProfYAML.h | 23 ++++++++
llvm/lib/ProfileData/MemProf.cpp | 34 +++++------
llvm/lib/ProfileData/MemProfReader.cpp | 9 +--
llvm/unittests/ProfileData/InstrProfTest.cpp | 2 +-
llvm/unittests/ProfileData/MemProfTest.cpp | 59 +++++++++++++-------
6 files changed, 137 insertions(+), 45 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 71e3faa506741..5e9440388dc1e 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -342,6 +342,28 @@ using CallStackId = uint64_t;
// A type representing the index into the call stack array.
using LinearCallStackId = uint32_t;
+// Holds call site information with indexed frame contents.
+struct IndexedCallSiteInfo {
+ // The call stack ID for this call site
+ CallStackId CSId = 0;
+ // The GUIDs of the callees at this call site
+ std::vector<GlobalValue::GUID> CalleeGuids;
+
+ IndexedCallSiteInfo() = default;
+ IndexedCallSiteInfo(CallStackId CSId) : CSId(CSId) {}
+ IndexedCallSiteInfo(CallStackId CSId,
+ std::vector<GlobalValue::GUID> CalleeGuids)
+ : CSId(CSId), CalleeGuids(std::move(CalleeGuids)) {}
+
+ bool operator==(const IndexedCallSiteInfo &Other) const {
+ return CSId == Other.CSId && CalleeGuids == Other.CalleeGuids;
+ }
+
+ bool operator!=(const IndexedCallSiteInfo &Other) const {
+ return !operator==(Other);
+ }
+};
+
// Holds allocation information in a space efficient format where frames are
// represented using unique identifiers.
struct IndexedAllocationInfo {
@@ -410,7 +432,7 @@ struct IndexedMemProfRecord {
// list of inline locations in bottom-up order i.e. from leaf to root. The
// inline location list may include additional entries, users should pick
// the last entry in the list with the same function GUID.
- llvm::SmallVector<CallStackId> CallSiteIds;
+ llvm::SmallVector<IndexedCallSiteInfo> CallSites;
void clear() { *this = IndexedMemProfRecord(); }
@@ -427,7 +449,7 @@ struct IndexedMemProfRecord {
if (Other.AllocSites != AllocSites)
return false;
- if (Other.CallSiteIds != CallSiteIds)
+ if (Other.CallSites != CallSites)
return false;
return true;
}
@@ -455,6 +477,29 @@ struct IndexedMemProfRecord {
static GlobalValue::GUID getGUID(const StringRef FunctionName);
};
+// Holds call site information with frame contents inline.
+struct CallSiteInfo {
+ // The frames in the call stack
+ std::vector<Frame> Frames;
+
+ // The GUIDs of the callees at this call site
+ std::vector<GlobalValue::GUID> CalleeGuids;
+
+ CallSiteInfo() = default;
+ CallSiteInfo(std::vector<Frame> Frames) : Frames(std::move(Frames)) {}
+ CallSiteInfo(std::vector<Frame> Frames,
+ std::vector<GlobalValue::GUID> CalleeGuids)
+ : Frames(std::move(Frames)), CalleeGuids(std::move(CalleeGuids)) {}
+
+ bool operator==(const CallSiteInfo &Other) const {
+ return Frames == Other.Frames && CalleeGuids == Other.CalleeGuids;
+ }
+
+ bool operator!=(const CallSiteInfo &Other) const {
+ return !operator==(Other);
+ }
+};
+
// Holds the memprof profile information for a function. The internal
// representation stores frame contents inline. This representation should
// be used for small amount of temporary, in memory instances.
@@ -462,7 +507,7 @@ struct MemProfRecord {
// Same as IndexedMemProfRecord::AllocSites with frame contents inline.
llvm::SmallVector<AllocationInfo> AllocSites;
// Same as IndexedMemProfRecord::CallSites with frame contents inline.
- llvm::SmallVector<std::vector<Frame>> CallSites;
+ llvm::SmallVector<CallSiteInfo> CallSites;
MemProfRecord() = default;
@@ -476,8 +521,8 @@ struct MemProfRecord {
if (!CallSites.empty()) {
OS << " CallSites:\n";
- for (const std::vector<Frame> &Frames : CallSites) {
- for (const Frame &F : Frames) {
+ for (const CallSiteInfo &CS : CallSites) {
+ for (const Frame &F : CS.Frames) {
OS << " -\n";
F.printYAML(OS);
}
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index fa1b7dd473843..270e22945bab5 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -155,6 +155,28 @@ template <> struct MappingTraits<memprof::AllocationInfo> {
// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
// treat the GUID and the fields within MemProfRecord at the same level as if
// the GUID were part of MemProfRecord.
+template <> struct MappingTraits<memprof::CallSiteInfo> {
+ static void mapping(IO &Io, memprof::CallSiteInfo &CS) {
+ Io.mapRequired("Frames", CS.Frames);
+ Io.mapOptional("CalleeGuids", CS.CalleeGuids);
+ }
+};
+
+// Add sequence traits for SmallVector<CallSiteInfo>
+template <unsigned N>
+struct SequenceTraits<SmallVector<memprof::CallSiteInfo, N>> {
+ static size_t size(IO &IO, SmallVector<memprof::CallSiteInfo, N> &Seq) {
+ return Seq.size();
+ }
+
+ static memprof::CallSiteInfo &
+ element(IO &IO, SmallVector<memprof::CallSiteInfo, N> &Seq, size_t Index) {
+ if (Index >= Seq.size())
+ Seq.resize(Index + 1);
+ return Seq[Index];
+ }
+};
+
template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
Io.mapRequired("GUID", Pair.GUID);
@@ -174,6 +196,7 @@ template <> struct MappingTraits<memprof::AllMemProfData> {
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 15ab44934bbf0..f3c5042b87837 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -64,7 +64,7 @@ static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The CallStackId
- Result += Record.CallSiteIds.size() * sizeof(CallStackId);
+ Result += Record.CallSites.size() * sizeof(CallStackId);
return Result;
}
@@ -78,7 +78,7 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The linear call stack ID.
- Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId);
+ Result += Record.CallSites.size() * sizeof(LinearCallStackId);
return Result;
}
@@ -106,9 +106,9 @@ static void serializeV2(const IndexedMemProfRecord &Record,
}
// Related contexts.
- LE.write<uint64_t>(Record.CallSiteIds.size());
- for (const auto &CSId : Record.CallSiteIds)
- LE.write<CallStackId>(CSId);
+ LE.write<uint64_t>(Record.CallSites.size());
+ for (const auto &CS : Record.CallSites)
+ LE.write<CallStackId>(CS.CSId);
}
static void serializeV3(
@@ -127,10 +127,10 @@ static void serializeV3(
}
// Related contexts.
- LE.write<uint64_t>(Record.CallSiteIds.size());
- for (const auto &CSId : Record.CallSiteIds) {
- assert(MemProfCallStackIndexes.contains(CSId));
- LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]);
+ LE.write<uint64_t>(Record.CallSites.size());
+ for (const auto &CS : Record.CallSites) {
+ assert(MemProfCallStackIndexes.contains(CS.CSId));
+ LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]);
}
}
@@ -170,11 +170,11 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
// Read the callsite information.
const uint64_t NumCtxs =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
- Record.CallSiteIds.reserve(NumCtxs);
+ Record.CallSites.reserve(NumCtxs);
for (uint64_t J = 0; J < NumCtxs; J++) {
CallStackId CSId =
endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
- Record.CallSiteIds.push_back(CSId);
+ Record.CallSites.push_back(IndexedCallSiteInfo(CSId));
}
return Record;
@@ -202,7 +202,7 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
// Read the callsite information.
const uint64_t NumCtxs =
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
- Record.CallSiteIds.reserve(NumCtxs);
+ Record.CallSites.reserve(NumCtxs);
for (uint64_t J = 0; J < NumCtxs; J++) {
// We are storing LinearCallStackId in CallSiteIds, which is a vector of
// CallStackId. Assert that CallStackId is no smaller than
@@ -210,7 +210,7 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
LinearCallStackId CSId =
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
- Record.CallSiteIds.push_back(CSId);
+ Record.CallSites.push_back(IndexedCallSiteInfo(CSId));
}
return Record;
@@ -241,9 +241,11 @@ MemProfRecord IndexedMemProfRecord::toMemProfRecord(
Record.AllocSites.push_back(std::move(AI));
}
- Record.CallSites.reserve(CallSiteIds.size());
- for (CallStackId CSId : CallSiteIds)
- Record.CallSites.push_back(Callback(CSId));
+ Record.CallSites.reserve(CallSites.size());
+ for (const IndexedCallSiteInfo &CS : CallSites) {
+ std::vector<Frame> Frames = Callback(CS.CSId);
+ Record.CallSites.push_back(CallSiteInfo(std::move(Frames), CS.CalleeGuids));
+ }
return Record;
}
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 6a4fecd5ae05e..73a8ea436c1c5 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -521,7 +521,8 @@ Error RawMemProfReader::mapRawProfileToRecords() {
// we insert a new entry for callsite data if we need to.
IndexedMemProfRecord &Record = MemProfData.Records[Id];
for (LocationPtr Loc : Locs)
- Record.CallSiteIds.push_back(MemProfData.addCallStack(*Loc));
+ Record.CallSites.push_back(
+ IndexedCallSiteInfo(MemProfData.addCallStack(*Loc)));
}
return Error::success();
@@ -808,10 +809,10 @@ void YAMLMemProfReader::parse(StringRef YAMLData) {
IndexedRecord.AllocSites.emplace_back(CSId, AI.Info);
}
- // Populate CallSiteIds.
+ // Populate CallSites with CalleeGuids.
for (const auto &CallSite : Record.CallSites) {
- CallStackId CSId = AddCallStack(CallSite);
- IndexedRecord.CallSiteIds.push_back(CSId);
+ CallStackId CSId = AddCallStack(CallSite.Frames);
+ IndexedRecord.CallSites.emplace_back(CSId, CallSite.CalleeGuids);
}
MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index adcd4d2d11e0f..34b1187c0b842 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -397,7 +397,7 @@ makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
for (const auto &CSId : AllocFrames)
MR.AllocSites.emplace_back(CSId, Block, Schema);
for (const auto &CSId : CallSiteFrames)
- MR.CallSiteIds.push_back(CSId);
+ MR.CallSites.push_back(llvm::memprof::IndexedCallSiteInfo(CSId));
return MR;
}
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 1fe9af521d884..3e430aa4eae58 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -210,8 +210,10 @@ TEST(MemProf, FillsValue) {
FrameContains("abc", 5U, 30U, false));
EXPECT_THAT(Bar.CallSites,
- ElementsAre(ElementsAre(FrameContains("foo", 5U, 30U, true),
- FrameContains("bar", 51U, 20U, false))));
+ ElementsAre(testing::Field(
+ &CallSiteInfo::Frames,
+ ElementsAre(FrameContains("foo", 5U, 30U, true),
+ FrameContains("bar", 51U, 20U, false)))));
// Check the memprof record for xyz.
const llvm::GlobalValue::GUID XyzId = IndexedMemProfRecord::getGUID("xyz");
@@ -220,8 +222,10 @@ TEST(MemProf, FillsValue) {
// Expect the entire frame even though in practice we only need the first
// entry here.
EXPECT_THAT(Xyz.CallSites,
- ElementsAre(ElementsAre(FrameContains("xyz", 5U, 30U, true),
- FrameContains("abc", 5U, 30U, false))));
+ ElementsAre(testing::Field(
+ &CallSiteInfo::Frames,
+ ElementsAre(FrameContains("xyz", 5U, 30U, true),
+ FrameContains("abc", 5U, 30U, false)))));
// Check the memprof record for abc.
const llvm::GlobalValue::GUID AbcId = IndexedMemProfRecord::getGUID("abc");
@@ -229,8 +233,10 @@ TEST(MemProf, FillsValue) {
const MemProfRecord &Abc = Records[AbcId];
EXPECT_TRUE(Abc.AllocSites.empty());
EXPECT_THAT(Abc.CallSites,
- ElementsAre(ElementsAre(FrameContains("xyz", 5U, 30U, true),
- FrameContains("abc", 5U, 30U, false))));
+ ElementsAre(testing::Field(
+ &CallSiteInfo::Frames,
+ ElementsAre(FrameContains("xyz", 5U, 30U, true),
+ FrameContains("abc", 5U, 30U, false)))));
}
TEST(MemProf, PortableWrapper) {
@@ -273,7 +279,8 @@ TEST(MemProf, RecordSerializationRoundTripVerion2) {
// Use the same info block for both allocation sites.
Record.AllocSites.emplace_back(CSId, Info);
}
- Record.CallSiteIds.assign(CallSiteIds);
+ for (auto CSId : CallSiteIds)
+ Record.CallSites.push_back(IndexedCallSiteInfo(CSId));
std::string Buffer;
llvm::raw_string_ostream OS(Buffer);
@@ -303,7 +310,8 @@ TEST(MemProf, RecordSerializationRoundTripVersion2HotColdSchema) {
// Use the same info block for both allocation sites.
Record.AllocSites.emplace_back(CSId, Info, Schema);
}
- Record.CallSiteIds.assign(CallSiteIds);
+ for (auto CSId : CallSiteIds)
+ Record.CallSites.push_back(IndexedCallSiteInfo(CSId));
std::bitset<llvm::to_underlying(Meta::Size)> SchemaBitSet;
for (auto Id : Schema)
@@ -498,8 +506,8 @@ TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
IndexedRecord.AllocSites.push_back(AI);
AI.CSId = CS2Id;
IndexedRecord.AllocSites.push_back(AI);
- IndexedRecord.CallSiteIds.push_back(CS3Id);
- IndexedRecord.CallSiteIds.push_back(CS4Id);
+ IndexedRecord.CallSites.push_back(IndexedCallSiteInfo(CS3Id));
+ IndexedRecord.CallSites.push_back(IndexedCallSiteInfo(CS4Id));
IndexedCallstackIdConveter CSIdConv(MemProfData);
@@ -513,8 +521,9 @@ TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
ASSERT_THAT(Record.AllocSites, SizeIs(2));
EXPECT_THAT(Record.AllocSites[0].CallStack, ElementsAre(F1, F2));
EXPECT_THAT(Record.AllocSites[1].CallStack, ElementsAre(F1, F3));
- EXPECT_THAT(Record.CallSites,
- ElementsAre(ElementsAre(F2, F3), ElementsAre(F2, F4)));
+ ASSERT_THAT(Record.CallSites, SizeIs(2));
+ EXPECT_THAT(Record.CallSites[0].Frames, ElementsAre(F2, F3));
+ EXPECT_THAT(Record.CallSites[1].Frames, ElementsAre(F2, F4));
}
// Populate those fields returned by getHotColdSchema.
@@ -690,10 +699,14 @@ TEST(MemProf, YAMLParser) {
AllocCount: 666
TotalSize: 555
CallSites:
- - - {Function: 0x500, LineOffset: 55, Column: 50, IsInlineFrame: true}
+ - Frames:
+ - {Function: 0x500, LineOffset: 55, Column: 50, IsInlineFrame: true}
- {Function: 0x600, LineOffset: 66, Column: 60, IsInlineFrame: false}
- - - {Function: 0x700, LineOffset: 77, Column: 70, IsInlineFrame: true}
+ CalleeGuids: [0x1000, 0x2000]
+ - Frames:
+ - {Function: 0x700, LineOffset: 77, Column: 70, IsInlineFrame: true}
- {Function: 0x800, LineOffset: 88, Column: 80, IsInlineFrame: false}
+ CalleeGuids: [0x3000]
)YAML";
YAMLMemProfReader YAMLReader;
@@ -719,11 +732,19 @@ TEST(MemProf, YAMLParser) {
ElementsAre(Frame(0x300, 33, 30, false), Frame(0x400, 44, 40, true)));
EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U);
EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U);
- EXPECT_THAT(Record.CallSites,
- ElementsAre(ElementsAre(Frame(0x500, 55, 50, true),
- Frame(0x600, 66, 60, false)),
- ElementsAre(Frame(0x700, 77, 70, true),
- Frame(0x800, 88, 80, false))));
+ EXPECT_THAT(
+ Record.CallSites,
+ ElementsAre(
+ AllOf(testing::Field(&CallSiteInfo::Frames,
+ ElementsAre(Frame(0x500, 55, 50, true),
+ Frame(0x600, 66, 60, false))),
+ testing::Field(&CallSiteInfo::CalleeGuids,
+ ElementsAre(0x1000, 0x2000))),
+ AllOf(testing::Field(&CallSiteInfo::Frames,
+ ElementsAre(Frame(0x700, 77, 70, true),
+ Frame(0x800, 88, 80, false))),
+ testing::Field(&CallSiteInfo::CalleeGuids,
+ ElementsAre(0x3000)))));
}
// Verify that the YAML parser accepts a GUID expressed as a function name.
More information about the llvm-commits
mailing list