[llvm] [memprof] Add call stack IDs to IndexedAllocationInfo (PR #85888)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 20 17:03:25 PDT 2024


https://github.com/kazutakahirata updated https://github.com/llvm/llvm-project/pull/85888

>From 524631bd181bd71417a3a76d340ebc230b2172e2 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Tue, 19 Mar 2024 15:58:16 -0700
Subject: [PATCH 1/2] [memprof] Add call stack IDs to IndexedAllocationInfo

The indexed MemProf file has a huge amount of redundancy.  In a large
internal application, 82% of call stacks, stored in
IndexedAllocationInfo::CallStack, are duplicates.

We should work toward deduplicating call stacks by referring to them
with unique IDs with actual call stacks stored in a separate data
structure, much like we refer to memprof::Frame with memprof::FrameId.

At the same time, we need to facilitate a graceful transition from the
current version of the MemProf format to the next.  We should be able
to read (but not write) the current version of the MemProf file even
after we move onto the next one.

With those goals in mind, I propose to have an integer ID next to
CallStack in IndexedAllocationInfo to refer to a call stack in a
succinct manner.  We'll gradually increase the areas of the compiler
where IDs and call stacks have one-to-one correspondence and
eventually remove the existing CallStack field.

This patch adds call stack ID, named CSId, to IndexedAllocationInfo
and teaches the raw profile reader to compute unique call stack IDs
and store them in the new field.  It does not introduce any user of
the call stack IDs yet, except in verifyFunctionProfileData.
---
 llvm/include/llvm/ProfileData/MemProf.h       | 19 ++++++++++--
 .../llvm/ProfileData/RawMemProfReader.h       |  2 ++
 llvm/lib/ProfileData/MemProf.cpp              | 29 +++++++++++++++++++
 llvm/lib/ProfileData/RawMemProfReader.cpp     | 15 +++++++++-
 llvm/unittests/ProfileData/InstrProfTest.cpp  |  2 +-
 llvm/unittests/ProfileData/MemProfTest.cpp    |  5 ++--
 6 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 37c19094bc2a63..bc8601733f6d26 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -1,6 +1,7 @@
 #ifndef LLVM_PROFILEDATA_MEMPROF_H_
 #define LLVM_PROFILEDATA_MEMPROF_H_
 
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/GlobalValue.h"
@@ -252,18 +253,26 @@ struct Frame {
   }
 };
 
+// A type representing the index into the table of call stacks.
+using CallStackId = uint64_t;
+
 // Holds allocation information in a space efficient format where frames are
 // represented using unique identifiers.
 struct IndexedAllocationInfo {
   // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
   // order. Frame contents are stored out-of-line.
+  // TODO: Remove once we fully transition to CSId.
   llvm::SmallVector<FrameId> CallStack;
+  // Conceptually the same as above.  We are going to keep both CallStack and
+  // CallStackId while we are transitioning from CallStack to CallStackId.
+  CallStackId CSId = 0;
   // The statistics obtained from the runtime for the allocation.
   PortableMemInfoBlock Info;
 
   IndexedAllocationInfo() = default;
-  IndexedAllocationInfo(ArrayRef<FrameId> CS, const MemInfoBlock &MB)
-      : CallStack(CS.begin(), CS.end()), Info(MB) {}
+  IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
+                        const MemInfoBlock &MB)
+      : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}
 
   // Returns the size in bytes when this allocation info struct is serialized.
   size_t serializedSize() const {
@@ -622,6 +631,12 @@ class FrameLookupTrait {
     return Frame::deserialize(D);
   }
 };
+
+// Verify that the set of CallStackIds and the set of call stacks have
+// one-to-one correspondence.
+void verifyFunctionProfileData(
+    const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
+        &FunctionProfileData);
 } // namespace memprof
 } // namespace llvm
 
diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 6aa5caec65f791..f84d13c48b4850 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -97,6 +97,8 @@ class MemProfReader {
   }
   // A mapping from FrameId (a hash of the contents) to the frame.
   llvm::DenseMap<FrameId, Frame> IdToFrame;
+  // A vector of all unique call stacks, indexed by CallStackId.
+  llvm::SmallVector<llvm::SmallVector<FrameId>> CallStacks;
   // A mapping from function GUID, hash of the canonical function symbol to the
   // memprof profile data for that function, i.e allocation and callsite info.
   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 0461f0e9f84078..0c9c5d3dad086d 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -117,5 +117,34 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
   return Result;
 }
 
+// Verify that the set of CallStackIds and the set of call stacks have
+// one-to-one correspondence.  This function is intended to help transition from
+// CallStack to CSId in IndexedAllocationInfo.
+void verifyFunctionProfileData(
+    const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
+        &FunctionProfileData) {
+  DenseMap<CallStackId, SmallVector<FrameId>> CSIdToCS;
+  std::map<llvm::SmallVector<FrameId>, CallStackId> CSToCSId;
+  for (const auto &[GUID, Record] : FunctionProfileData) {
+    (void)GUID;
+    for (const auto &AS : Record.AllocSites) {
+      auto Result = CSToCSId.insert({AS.CallStack, AS.CSId});
+      if (!Result.second) {
+        assert(Result.first->second == AS.CSId);
+      }
+      auto Result2 = CSIdToCS.insert({AS.CSId, AS.CallStack});
+      if (!Result2.second) {
+        const auto &Other = Result2.first->second;
+        assert(Other.size() == AS.CallStack.size());
+        (void)Other;
+        for (size_t I = 0, E = AS.CallStack.size(); I != E; ++I) {
+          assert(Other[I] == AS.CallStack[I]);
+        }
+      }
+    }
+  }
+  assert(CSIdToCS.size() == CSToCSId.size());
+}
+
 } // namespace memprof
 } // namespace llvm
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 0e2b8668bab72c..43b498287cd375 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -402,6 +402,9 @@ Error RawMemProfReader::mapRawProfileToRecords() {
   llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
       PerFunctionCallSites;
 
+  // Hold a mapping from callstack to its CallStackID.
+  std::map<llvm::SmallVector<FrameId>, CallStackId> CallStackToCallStackId;
+
   // Convert the raw profile callstack data into memprof records. While doing so
   // keep track of related contexts so that we can fill these in later.
   for (const auto &Entry : CallstackProfileData) {
@@ -445,6 +448,12 @@ Error RawMemProfReader::mapRawProfileToRecords() {
       Callstack.append(Frames.begin(), Frames.end());
     }
 
+    auto InsertResult =
+        CallStackToCallStackId.insert({Callstack, CallStacks.size()});
+    if (InsertResult.second)
+      CallStacks.push_back(Callstack);
+    CallStackId CSId = InsertResult.first->second;
+
     // We attach the memprof record to each function bottom-up including the
     // first non-inline frame.
     for (size_t I = 0; /*Break out using the condition below*/; I++) {
@@ -452,7 +461,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
       auto Result =
           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
       IndexedMemProfRecord &Record = Result.first->second;
-      Record.AllocSites.emplace_back(Callstack, Entry.second);
+      Record.AllocSites.emplace_back(Callstack, CSId, Entry.second);
 
       if (!F.IsInlineFrame)
         break;
@@ -470,6 +479,10 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     }
   }
 
+#ifdef EXPENSIVE_CHECKS
+  verifyFunctionProfileData(FunctionProfileData);
+#endif
+
   return Error::success();
 }
 
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index cd4552a039b36b..aac2f53565cbdd 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -366,7 +366,7 @@ IndexedMemProfRecord makeRecord(
     const MemInfoBlock &Block = MemInfoBlock()) {
   llvm::memprof::IndexedMemProfRecord MR;
   for (const auto &Frames : AllocFrames)
-    MR.AllocSites.emplace_back(Frames, Block);
+    MR.AllocSites.emplace_back(Frames, 0, Block);
   for (const auto &Frames : CallSiteFrames)
     MR.CallSites.push_back(Frames);
   return MR;
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index f5e4a4aff2ed17..3aca09ed36e3f6 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -280,7 +280,7 @@ TEST(MemProf, RecordSerializationRoundTrip) {
   IndexedMemProfRecord Record;
   for (const auto &ACS : AllocCallStacks) {
     // Use the same info block for both allocation sites.
-    Record.AllocSites.emplace_back(ACS, Info);
+    Record.AllocSites.emplace_back(ACS, 0, Info);
   }
   Record.CallSites.assign(CallSites);
 
@@ -376,7 +376,8 @@ TEST(MemProf, BaseMemProfReader) {
   Block.AllocCount = 1U, Block.TotalAccessDensity = 4,
   Block.TotalLifetime = 200001;
   std::array<FrameId, 2> CallStack{F1.hash(), F2.hash()};
-  FakeRecord.AllocSites.emplace_back(/*CS=*/CallStack, /*MB=*/Block);
+  FakeRecord.AllocSites.emplace_back(/*CS=*/CallStack, /*CSId=*/0,
+                                     /*MB=*/Block);
   ProfData.insert({F1.hash(), FakeRecord});
 
   MemProfReader Reader(FrameIdMap, ProfData);

>From fd22271cf45739592f40b02c5581d93952fb3f47 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 20 Mar 2024 16:30:05 -0700
Subject: [PATCH 2/2] Use a hash value as CallStackId.

---
 llvm/include/llvm/ProfileData/MemProf.h      |  8 +++--
 llvm/lib/ProfileData/MemProf.cpp             | 34 +++++++++-----------
 llvm/lib/ProfileData/RawMemProfReader.cpp    | 11 +------
 llvm/unittests/ProfileData/InstrProfTest.cpp |  3 +-
 llvm/unittests/ProfileData/MemProfTest.cpp   |  8 +++--
 5 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index bc8601733f6d26..75ea0e49d45373 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -632,8 +632,12 @@ class FrameLookupTrait {
   }
 };
 
-// Verify that the set of CallStackIds and the set of call stacks have
-// one-to-one correspondence.
+// Compute a CallStackId for a given call stack.
+CallStackId hashCallStack(ArrayRef<FrameId> CS);
+
+// Verify that each CallStackId is computed with hashCallStack.  This function
+// is intended to help transition from CallStack to CSId in
+// IndexedAllocationInfo.
 void verifyFunctionProfileData(
     const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
         &FunctionProfileData);
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 0c9c5d3dad086d..bffa4ed19be8c8 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -3,8 +3,10 @@
 #include "llvm/IR/Function.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/Support/HashBuilder.h"
 
 namespace llvm {
 namespace memprof {
@@ -117,33 +119,27 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
   return Result;
 }
 
-// Verify that the set of CallStackIds and the set of call stacks have
-// one-to-one correspondence.  This function is intended to help transition from
-// CallStack to CSId in IndexedAllocationInfo.
+CallStackId hashCallStack(ArrayRef<FrameId> CS) {
+  llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
+      HashBuilder;
+  for (FrameId F : CS)
+    HashBuilder.add(F);
+  llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+  CallStackId CSId;
+  std::memcpy(&CSId, Hash.data(), sizeof(Hash));
+  return CSId;
+}
+
 void verifyFunctionProfileData(
     const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
         &FunctionProfileData) {
-  DenseMap<CallStackId, SmallVector<FrameId>> CSIdToCS;
-  std::map<llvm::SmallVector<FrameId>, CallStackId> CSToCSId;
   for (const auto &[GUID, Record] : FunctionProfileData) {
     (void)GUID;
     for (const auto &AS : Record.AllocSites) {
-      auto Result = CSToCSId.insert({AS.CallStack, AS.CSId});
-      if (!Result.second) {
-        assert(Result.first->second == AS.CSId);
-      }
-      auto Result2 = CSIdToCS.insert({AS.CSId, AS.CallStack});
-      if (!Result2.second) {
-        const auto &Other = Result2.first->second;
-        assert(Other.size() == AS.CallStack.size());
-        (void)Other;
-        for (size_t I = 0, E = AS.CallStack.size(); I != E; ++I) {
-          assert(Other[I] == AS.CallStack[I]);
-        }
-      }
+      assert(AS.CSId == hashCallStack(AS.CallStack));
+      (void)AS;
     }
   }
-  assert(CSIdToCS.size() == CSToCSId.size());
 }
 
 } // namespace memprof
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 43b498287cd375..b2bd38d3fbf203 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -402,9 +402,6 @@ Error RawMemProfReader::mapRawProfileToRecords() {
   llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
       PerFunctionCallSites;
 
-  // Hold a mapping from callstack to its CallStackID.
-  std::map<llvm::SmallVector<FrameId>, CallStackId> CallStackToCallStackId;
-
   // Convert the raw profile callstack data into memprof records. While doing so
   // keep track of related contexts so that we can fill these in later.
   for (const auto &Entry : CallstackProfileData) {
@@ -448,11 +445,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
       Callstack.append(Frames.begin(), Frames.end());
     }
 
-    auto InsertResult =
-        CallStackToCallStackId.insert({Callstack, CallStacks.size()});
-    if (InsertResult.second)
-      CallStacks.push_back(Callstack);
-    CallStackId CSId = InsertResult.first->second;
+    CallStackId CSId = hashCallStack(Callstack);
 
     // We attach the memprof record to each function bottom-up including the
     // first non-inline frame.
@@ -479,9 +472,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     }
   }
 
-#ifdef EXPENSIVE_CHECKS
   verifyFunctionProfileData(FunctionProfileData);
-#endif
 
   return Error::success();
 }
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index aac2f53565cbdd..c9323420bda79b 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -366,7 +366,8 @@ IndexedMemProfRecord makeRecord(
     const MemInfoBlock &Block = MemInfoBlock()) {
   llvm::memprof::IndexedMemProfRecord MR;
   for (const auto &Frames : AllocFrames)
-    MR.AllocSites.emplace_back(Frames, 0, Block);
+    MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
+                               Block);
   for (const auto &Frames : CallSiteFrames)
     MR.CallSites.push_back(Frames);
   return MR;
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 3aca09ed36e3f6..1cca44e9b03707 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -280,7 +280,8 @@ TEST(MemProf, RecordSerializationRoundTrip) {
   IndexedMemProfRecord Record;
   for (const auto &ACS : AllocCallStacks) {
     // Use the same info block for both allocation sites.
-    Record.AllocSites.emplace_back(ACS, 0, Info);
+    Record.AllocSites.emplace_back(ACS, llvm::memprof::hashCallStack(ACS),
+                                   Info);
   }
   Record.CallSites.assign(CallSites);
 
@@ -376,8 +377,9 @@ TEST(MemProf, BaseMemProfReader) {
   Block.AllocCount = 1U, Block.TotalAccessDensity = 4,
   Block.TotalLifetime = 200001;
   std::array<FrameId, 2> CallStack{F1.hash(), F2.hash()};
-  FakeRecord.AllocSites.emplace_back(/*CS=*/CallStack, /*CSId=*/0,
-                                     /*MB=*/Block);
+  FakeRecord.AllocSites.emplace_back(
+      /*CS=*/CallStack, /*CSId=*/llvm::memprof::hashCallStack(CallStack),
+      /*MB=*/Block);
   ProfData.insert({F1.hash(), FakeRecord});
 
   MemProfReader Reader(FrameIdMap, ProfData);



More information about the llvm-commits mailing list