[llvm] [MemProf] Templatize CallStackRadixTreeBuilder (NFC) (PR #117014)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 09:33:55 PST 2024
https://github.com/teresajohnson created https://github.com/llvm/llvm-project/pull/117014
Prepare for usage in the bitcode reader/writer where we already have a
LinearFrameId:
- templatize input frame id type in CallStackRadixTreeBuilder
- templatize input frame id type in computeFrameHistogram
- make the map from FrameId to LinearFrameId optional
We plan to use the same radix format in the ThinLTO summary records,
where we already have a LinearFrameId.
>From ba6e933214c4c22a20e5a2fb2764ae92fe70a5e5 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 20 Nov 2024 09:26:32 -0800
Subject: [PATCH] [MemProf] Templatize CallStackRadixTreeBuilder (NFC)
Prepare for usage in the bitcode reader/writer where we already have a
LinearFrameId:
- templatize input frame id type in CallStackRadixTreeBuilder
- templatize input frame id type in computeFrameHistogram
- make the map from FrameId to LinearFrameId optional
We plan to use the same radix format in the ThinLTO summary records,
where we already have a LinearFrameId.
---
llvm/include/llvm/ProfileData/MemProf.h | 25 ++++++------
llvm/lib/ProfileData/InstrProfWriter.cpp | 2 +-
llvm/lib/ProfileData/MemProf.cpp | 44 ++++++++++++++--------
llvm/unittests/ProfileData/MemProfTest.cpp | 16 ++++----
4 files changed, 52 insertions(+), 35 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 9415e554bcc0ab..f97fbd4bd64419 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -1050,8 +1050,9 @@ struct FrameStat {
};
// Compute a histogram of Frames in call stacks.
-llvm::DenseMap<FrameId, FrameStat>
-computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+template <typename FrameIdTy>
+llvm::DenseMap<FrameIdTy, FrameStat>
+computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&MemProfCallStackData);
// Construct a radix tree of call stacks.
@@ -1109,7 +1110,7 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
// the radix tree array, so we do not explicitly encode mappings like:
// "CallStackId 1 -> 11".
-class CallStackRadixTreeBuilder {
+template <typename FrameIdTy> class CallStackRadixTreeBuilder {
// The radix tree array.
std::vector<LinearFrameId> RadixArray;
@@ -1136,23 +1137,25 @@ class CallStackRadixTreeBuilder {
// RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
std::vector<LinearCallStackId> Indexes;
- using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
+ using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameIdTy>>;
// Encode a call stack into RadixArray. Return the starting index within
// RadixArray.
- LinearCallStackId encodeCallStack(
- const llvm::SmallVector<FrameId> *CallStack,
- const llvm::SmallVector<FrameId> *Prev,
- const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
+ LinearCallStackId
+ encodeCallStack(const llvm::SmallVector<FrameIdTy> *CallStack,
+ const llvm::SmallVector<FrameIdTy> *Prev,
+ std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+ MemProfFrameIndexes);
public:
CallStackRadixTreeBuilder() = default;
// Build a radix tree array.
- void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+ void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&&MemProfCallStackData,
- const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
- llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
+ std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+ MemProfFrameIndexes,
+ llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 5580fe04ffe7df..d90629ad57f5b9 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -635,7 +635,7 @@ writeMemProfCallStackArray(
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
MemProfCallStackIndexes;
- memprof::CallStackRadixTreeBuilder Builder;
+ memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
for (auto I : Builder.getRadixArray())
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 6c4bda2d9264ff..9d5ac748d7975d 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -436,10 +436,12 @@ CallStackId hashCallStack(ArrayRef<FrameId> CS) {
// To quickly determine the location of the common prefix within RadixArray,
// Indexes caches the indexes of the previous call stack's frames within
// RadixArray.
-LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
- const llvm::SmallVector<FrameId> *CallStack,
- const llvm::SmallVector<FrameId> *Prev,
- const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) {
+template <typename FrameIdTy>
+LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
+ const llvm::SmallVector<FrameIdTy> *CallStack,
+ const llvm::SmallVector<FrameIdTy> *Prev,
+ std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+ MemProfFrameIndexes) {
// Compute the length of the common root prefix between Prev and CallStack.
uint32_t CommonLen = 0;
if (Prev) {
@@ -464,10 +466,11 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
// Copy the part of the call stack beyond the common prefix to RadixArray.
assert(CommonLen <= CallStack->size());
- for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
+ for (FrameIdTy F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
// Remember the index of F in RadixArray.
Indexes.push_back(RadixArray.size());
- RadixArray.push_back(MemProfFrameIndexes.find(F)->second);
+ RadixArray.push_back(
+ MemProfFrameIndexes ? MemProfFrameIndexes->find(F)->second : F);
}
assert(CallStack->size() == Indexes.size());
@@ -479,11 +482,13 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
return RadixArray.size() - 1;
}
-void CallStackRadixTreeBuilder::build(
- llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+template <typename FrameIdTy>
+void CallStackRadixTreeBuilder<FrameIdTy>::build(
+ llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&&MemProfCallStackData,
- const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
- llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) {
+ std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+ MemProfFrameIndexes,
+ llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
// Take the vector portion of MemProfCallStackData. The vector is exactly
// what we need to sort. Also, we no longer need its lookup capability.
llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
@@ -535,7 +540,7 @@ void CallStackRadixTreeBuilder::build(
// root.
return std::lexicographical_compare(
L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
- [&](FrameId F1, FrameId F2) {
+ [&](FrameIdTy F1, FrameIdTy F2) {
uint64_t H1 = FrameHistogram[F1].Count;
uint64_t H2 = FrameHistogram[F2].Count;
// Popular frames should come later because we encode call stacks from
@@ -585,7 +590,7 @@ void CallStackRadixTreeBuilder::build(
// traverse CallStacks in the reverse order, then Call Stack 3 has the
// complete call stack encoded without any pointers. Call Stack 1 and 2 point
// to appropriate prefixes of Call Stack 3.
- const llvm::SmallVector<FrameId> *Prev = nullptr;
+ const llvm::SmallVector<FrameIdTy> *Prev = nullptr;
for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
LinearCallStackId Pos =
encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
@@ -608,10 +613,14 @@ void CallStackRadixTreeBuilder::build(
V = RadixArray.size() - 1 - V;
}
-llvm::DenseMap<FrameId, FrameStat>
-computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+// Explicitly instantiate class with the utilized FrameIdTy.
+template class CallStackRadixTreeBuilder<FrameId>;
+
+template <typename FrameIdTy>
+llvm::DenseMap<FrameIdTy, FrameStat>
+computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&MemProfCallStackData) {
- llvm::DenseMap<FrameId, FrameStat> Histogram;
+ llvm::DenseMap<FrameIdTy, FrameStat> Histogram;
for (const auto &KV : MemProfCallStackData) {
const auto &CS = KV.second;
@@ -624,6 +633,11 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
return Histogram;
}
+// Explicitly instantiate function with the utilized FrameIdTy.
+template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
+ llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+ &MemProfCallStackData);
+
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
for (const auto &AS : Record.AllocSites) {
assert(AS.CSId == hashCallStack(AS.CallStack));
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index e5d41db06dcc07..e68cc094338736 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -659,8 +659,8 @@ TEST(MemProf, RadixTreeBuilderEmpty) {
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> MemProfCallStackData;
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
- llvm::memprof::computeFrameHistogram(MemProfCallStackData);
- llvm::memprof::CallStackRadixTreeBuilder Builder;
+ llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+ llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty());
@@ -677,8 +677,8 @@ TEST(MemProf, RadixTreeBuilderOne) {
MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1});
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
- llvm::memprof::computeFrameHistogram(MemProfCallStackData);
- llvm::memprof::CallStackRadixTreeBuilder Builder;
+ llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+ llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({
@@ -704,8 +704,8 @@ TEST(MemProf, RadixTreeBuilderTwo) {
MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2});
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
- llvm::memprof::computeFrameHistogram(MemProfCallStackData);
- llvm::memprof::CallStackRadixTreeBuilder Builder;
+ llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+ llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
@@ -742,8 +742,8 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS4), CS4});
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
- llvm::memprof::computeFrameHistogram(MemProfCallStackData);
- llvm::memprof::CallStackRadixTreeBuilder Builder;
+ llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+ llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
More information about the llvm-commits
mailing list