[llvm] [MemProf] Templatize CallStackRadixTreeBuilder (NFC) (PR #117014)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 20 09:33:55 PST 2024


https://github.com/teresajohnson created https://github.com/llvm/llvm-project/pull/117014

Prepare for usage in the bitcode reader/writer where we already have a
LinearFrameId:
- templatize input frame id type in CallStackRadixTreeBuilder
- templatize input frame id type in computeFrameHistogram
- make the map from FrameId to LinearFrameId optional

We plan to use the same radix format in the ThinLTO summary records,
where we already have a LinearFrameId.


>From ba6e933214c4c22a20e5a2fb2764ae92fe70a5e5 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 20 Nov 2024 09:26:32 -0800
Subject: [PATCH] [MemProf] Templatize CallStackRadixTreeBuilder (NFC)

Prepare for usage in the bitcode reader/writer where we already have a
LinearFrameId:
- templatize input frame id type in CallStackRadixTreeBuilder
- templatize input frame id type in computeFrameHistogram
- make the map from FrameId to LinearFrameId optional

We plan to use the same radix format in the ThinLTO summary records,
where we already have a LinearFrameId.
---
 llvm/include/llvm/ProfileData/MemProf.h    | 25 ++++++------
 llvm/lib/ProfileData/InstrProfWriter.cpp   |  2 +-
 llvm/lib/ProfileData/MemProf.cpp           | 44 ++++++++++++++--------
 llvm/unittests/ProfileData/MemProfTest.cpp | 16 ++++----
 4 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 9415e554bcc0ab..f97fbd4bd64419 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -1050,8 +1050,9 @@ struct FrameStat {
 };
 
 // Compute a histogram of Frames in call stacks.
-llvm::DenseMap<FrameId, FrameStat>
-computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+template <typename FrameIdTy>
+llvm::DenseMap<FrameIdTy, FrameStat>
+computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
                           &MemProfCallStackData);
 
 // Construct a radix tree of call stacks.
@@ -1109,7 +1110,7 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
 // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
 // the radix tree array, so we do not explicitly encode mappings like:
 // "CallStackId 1 -> 11".
-class CallStackRadixTreeBuilder {
+template <typename FrameIdTy> class CallStackRadixTreeBuilder {
   // The radix tree array.
   std::vector<LinearFrameId> RadixArray;
 
@@ -1136,23 +1137,25 @@ class CallStackRadixTreeBuilder {
   // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
   std::vector<LinearCallStackId> Indexes;
 
-  using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
+  using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameIdTy>>;
 
   // Encode a call stack into RadixArray.  Return the starting index within
   // RadixArray.
-  LinearCallStackId encodeCallStack(
-      const llvm::SmallVector<FrameId> *CallStack,
-      const llvm::SmallVector<FrameId> *Prev,
-      const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
+  LinearCallStackId
+  encodeCallStack(const llvm::SmallVector<FrameIdTy> *CallStack,
+                  const llvm::SmallVector<FrameIdTy> *Prev,
+                  std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+                      MemProfFrameIndexes);
 
 public:
   CallStackRadixTreeBuilder() = default;
 
   // Build a radix tree array.
-  void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+  void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
                  &&MemProfCallStackData,
-             const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
-             llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
+             std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+                 MemProfFrameIndexes,
+             llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
 
   ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 5580fe04ffe7df..d90629ad57f5b9 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -635,7 +635,7 @@ writeMemProfCallStackArray(
   llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
       MemProfCallStackIndexes;
 
-  memprof::CallStackRadixTreeBuilder Builder;
+  memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
   Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
                 FrameHistogram);
   for (auto I : Builder.getRadixArray())
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 6c4bda2d9264ff..9d5ac748d7975d 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -436,10 +436,12 @@ CallStackId hashCallStack(ArrayRef<FrameId> CS) {
 // To quickly determine the location of the common prefix within RadixArray,
 // Indexes caches the indexes of the previous call stack's frames within
 // RadixArray.
-LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
-    const llvm::SmallVector<FrameId> *CallStack,
-    const llvm::SmallVector<FrameId> *Prev,
-    const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) {
+template <typename FrameIdTy>
+LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
+    const llvm::SmallVector<FrameIdTy> *CallStack,
+    const llvm::SmallVector<FrameIdTy> *Prev,
+    std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+        MemProfFrameIndexes) {
   // Compute the length of the common root prefix between Prev and CallStack.
   uint32_t CommonLen = 0;
   if (Prev) {
@@ -464,10 +466,11 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
 
   // Copy the part of the call stack beyond the common prefix to RadixArray.
   assert(CommonLen <= CallStack->size());
-  for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
+  for (FrameIdTy F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
     // Remember the index of F in RadixArray.
     Indexes.push_back(RadixArray.size());
-    RadixArray.push_back(MemProfFrameIndexes.find(F)->second);
+    RadixArray.push_back(
+        MemProfFrameIndexes ? MemProfFrameIndexes->find(F)->second : F);
   }
   assert(CallStack->size() == Indexes.size());
 
@@ -479,11 +482,13 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
   return RadixArray.size() - 1;
 }
 
-void CallStackRadixTreeBuilder::build(
-    llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+template <typename FrameIdTy>
+void CallStackRadixTreeBuilder<FrameIdTy>::build(
+    llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
         &&MemProfCallStackData,
-    const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
-    llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) {
+    std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
+        MemProfFrameIndexes,
+    llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
   // Take the vector portion of MemProfCallStackData.  The vector is exactly
   // what we need to sort.  Also, we no longer need its lookup capability.
   llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
@@ -535,7 +540,7 @@ void CallStackRadixTreeBuilder::build(
     // root.
     return std::lexicographical_compare(
         L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
-        [&](FrameId F1, FrameId F2) {
+        [&](FrameIdTy F1, FrameIdTy F2) {
           uint64_t H1 = FrameHistogram[F1].Count;
           uint64_t H2 = FrameHistogram[F2].Count;
           // Popular frames should come later because we encode call stacks from
@@ -585,7 +590,7 @@ void CallStackRadixTreeBuilder::build(
   // traverse CallStacks in the reverse order, then Call Stack 3 has the
   // complete call stack encoded without any pointers.  Call Stack 1 and 2 point
   // to appropriate prefixes of Call Stack 3.
-  const llvm::SmallVector<FrameId> *Prev = nullptr;
+  const llvm::SmallVector<FrameIdTy> *Prev = nullptr;
   for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
     LinearCallStackId Pos =
         encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
@@ -608,10 +613,14 @@ void CallStackRadixTreeBuilder::build(
     V = RadixArray.size() - 1 - V;
 }
 
-llvm::DenseMap<FrameId, FrameStat>
-computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+// Explicitly instantiate class with the utilized FrameIdTy.
+template class CallStackRadixTreeBuilder<FrameId>;
+
+template <typename FrameIdTy>
+llvm::DenseMap<FrameIdTy, FrameStat>
+computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
                           &MemProfCallStackData) {
-  llvm::DenseMap<FrameId, FrameStat> Histogram;
+  llvm::DenseMap<FrameIdTy, FrameStat> Histogram;
 
   for (const auto &KV : MemProfCallStackData) {
     const auto &CS = KV.second;
@@ -624,6 +633,11 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
   return Histogram;
 }
 
+// Explicitly instantiate function with the utilized FrameIdTy.
+template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
+    llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
+        &MemProfCallStackData);
+
 void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
   for (const auto &AS : Record.AllocSites) {
     assert(AS.CSId == hashCallStack(AS.CallStack));
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index e5d41db06dcc07..e68cc094338736 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -659,8 +659,8 @@ TEST(MemProf, RadixTreeBuilderEmpty) {
   llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> MemProfCallStackData;
   llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
       FrameHistogram =
-          llvm::memprof::computeFrameHistogram(MemProfCallStackData);
-  llvm::memprof::CallStackRadixTreeBuilder Builder;
+          llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+  llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
   Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
                 FrameHistogram);
   ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty());
@@ -677,8 +677,8 @@ TEST(MemProf, RadixTreeBuilderOne) {
   MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1});
   llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
       FrameHistogram =
-          llvm::memprof::computeFrameHistogram(MemProfCallStackData);
-  llvm::memprof::CallStackRadixTreeBuilder Builder;
+          llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+  llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
   Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
                 FrameHistogram);
   EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({
@@ -704,8 +704,8 @@ TEST(MemProf, RadixTreeBuilderTwo) {
   MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2});
   llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
       FrameHistogram =
-          llvm::memprof::computeFrameHistogram(MemProfCallStackData);
-  llvm::memprof::CallStackRadixTreeBuilder Builder;
+          llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+  llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
   Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
                 FrameHistogram);
   EXPECT_THAT(Builder.getRadixArray(),
@@ -742,8 +742,8 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
   MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS4), CS4});
   llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
       FrameHistogram =
-          llvm::memprof::computeFrameHistogram(MemProfCallStackData);
-  llvm::memprof::CallStackRadixTreeBuilder Builder;
+          llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
+  llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
   Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
                 FrameHistogram);
   EXPECT_THAT(Builder.getRadixArray(),



More information about the llvm-commits mailing list