[llvm] 74799f4 - [memprof] Add call stack IDs to IndexedAllocationInfo (#85888)

via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 23 19:50:18 PDT 2024


Author: Kazu Hirata
Date: 2024-03-23T19:50:15-07:00
New Revision: 74799f424063a2d751e0f9ea698db1f4efd0d8b2

URL: https://github.com/llvm/llvm-project/commit/74799f424063a2d751e0f9ea698db1f4efd0d8b2
DIFF: https://github.com/llvm/llvm-project/commit/74799f424063a2d751e0f9ea698db1f4efd0d8b2.diff

LOG: [memprof] Add call stack IDs to IndexedAllocationInfo (#85888)

The indexed MemProf file has a huge amount of redundancy.  In a large
internal application, 82% of call stacks, stored in
IndexedAllocationInfo::CallStack, are duplicates.

We should work toward deduplicating call stacks by referring to them
with unique IDs with actual call stacks stored in a separate data
structure, much like we refer to memprof::Frame with memprof::FrameId.

At the same time, we need to facilitate a graceful transition from the
current version of the MemProf format to the next.  We should be able
to read (but not write) the current version of the MemProf file even
after we move onto the next one.

With those goals in mind, I propose to have an integer ID next to
CallStack in IndexedAllocationInfo to refer to a call stack in a
succinct manner.  We'll gradually increase the areas of the compiler
where IDs and call stacks have one-to-one correspondence and
eventually remove the existing CallStack field.

This patch adds call stack ID, named CSId, to IndexedAllocationInfo
and teaches the raw profile reader to compute unique call stack IDs
and store them in the new field.  It does not introduce any user of
the call stack IDs yet, except in verifyFunctionProfileData.

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/lib/ProfileData/MemProf.cpp
    llvm/lib/ProfileData/RawMemProfReader.cpp
    llvm/unittests/ProfileData/InstrProfTest.cpp
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 37c19094bc2a63..75ea0e49d45373 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -1,6 +1,7 @@
 #ifndef LLVM_PROFILEDATA_MEMPROF_H_
 #define LLVM_PROFILEDATA_MEMPROF_H_
 
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/GlobalValue.h"
@@ -252,18 +253,26 @@ struct Frame {
   }
 };
 
+// A type representing the index into the table of call stacks.
+using CallStackId = uint64_t;
+
 // Holds allocation information in a space efficient format where frames are
 // represented using unique identifiers.
 struct IndexedAllocationInfo {
   // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
   // order. Frame contents are stored out-of-line.
+  // TODO: Remove once we fully transition to CSId.
   llvm::SmallVector<FrameId> CallStack;
+  // Conceptually the same as above.  We are going to keep both CallStack and
+  // CallStackId while we are transitioning from CallStack to CallStackId.
+  CallStackId CSId = 0;
   // The statistics obtained from the runtime for the allocation.
   PortableMemInfoBlock Info;
 
   IndexedAllocationInfo() = default;
-  IndexedAllocationInfo(ArrayRef<FrameId> CS, const MemInfoBlock &MB)
-      : CallStack(CS.begin(), CS.end()), Info(MB) {}
+  IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
+                        const MemInfoBlock &MB)
+      : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}
 
   // Returns the size in bytes when this allocation info struct is serialized.
   size_t serializedSize() const {
@@ -622,6 +631,16 @@ class FrameLookupTrait {
     return Frame::deserialize(D);
   }
 };
+
+// Compute a CallStackId for a given call stack.
+CallStackId hashCallStack(ArrayRef<FrameId> CS);
+
+// Verify that each CallStackId is computed with hashCallStack.  This function
+// is intended to help transition from CallStack to CSId in
+// IndexedAllocationInfo.
+void verifyFunctionProfileData(
+    const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
+        &FunctionProfileData);
 } // namespace memprof
 } // namespace llvm
 

diff  --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 0461f0e9f84078..bffa4ed19be8c8 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -3,8 +3,10 @@
 #include "llvm/IR/Function.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/Support/HashBuilder.h"
 
 namespace llvm {
 namespace memprof {
@@ -117,5 +119,28 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
   return Result;
 }
 
+CallStackId hashCallStack(ArrayRef<FrameId> CS) {
+  llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
+      HashBuilder;
+  for (FrameId F : CS)
+    HashBuilder.add(F);
+  llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+  CallStackId CSId;
+  std::memcpy(&CSId, Hash.data(), sizeof(Hash));
+  return CSId;
+}
+
+void verifyFunctionProfileData(
+    const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
+        &FunctionProfileData) {
+  for (const auto &[GUID, Record] : FunctionProfileData) {
+    (void)GUID;
+    for (const auto &AS : Record.AllocSites) {
+      assert(AS.CSId == hashCallStack(AS.CallStack));
+      (void)AS;
+    }
+  }
+}
+
 } // namespace memprof
 } // namespace llvm

diff  --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 60c37c417aa049..5dc1ff8978154c 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -446,6 +446,8 @@ Error RawMemProfReader::mapRawProfileToRecords() {
       Callstack.append(Frames.begin(), Frames.end());
     }
 
+    CallStackId CSId = hashCallStack(Callstack);
+
     // We attach the memprof record to each function bottom-up including the
     // first non-inline frame.
     for (size_t I = 0; /*Break out using the condition below*/; I++) {
@@ -453,7 +455,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
       auto Result =
           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
       IndexedMemProfRecord &Record = Result.first->second;
-      Record.AllocSites.emplace_back(Callstack, Entry.second);
+      Record.AllocSites.emplace_back(Callstack, CSId, Entry.second);
 
       if (!F.IsInlineFrame)
         break;
@@ -471,6 +473,8 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     }
   }
 
+  verifyFunctionProfileData(FunctionProfileData);
+
   return Error::success();
 }
 

diff  --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index cd4552a039b36b..c9323420bda79b 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -366,7 +366,8 @@ IndexedMemProfRecord makeRecord(
     const MemInfoBlock &Block = MemInfoBlock()) {
   llvm::memprof::IndexedMemProfRecord MR;
   for (const auto &Frames : AllocFrames)
-    MR.AllocSites.emplace_back(Frames, Block);
+    MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
+                               Block);
   for (const auto &Frames : CallSiteFrames)
     MR.CallSites.push_back(Frames);
   return MR;

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index f5e4a4aff2ed17..1cca44e9b03707 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -280,7 +280,8 @@ TEST(MemProf, RecordSerializationRoundTrip) {
   IndexedMemProfRecord Record;
   for (const auto &ACS : AllocCallStacks) {
     // Use the same info block for both allocation sites.
-    Record.AllocSites.emplace_back(ACS, Info);
+    Record.AllocSites.emplace_back(ACS, llvm::memprof::hashCallStack(ACS),
+                                   Info);
   }
   Record.CallSites.assign(CallSites);
 
@@ -376,7 +377,9 @@ TEST(MemProf, BaseMemProfReader) {
   Block.AllocCount = 1U, Block.TotalAccessDensity = 4,
   Block.TotalLifetime = 200001;
   std::array<FrameId, 2> CallStack{F1.hash(), F2.hash()};
-  FakeRecord.AllocSites.emplace_back(/*CS=*/CallStack, /*MB=*/Block);
+  FakeRecord.AllocSites.emplace_back(
+      /*CS=*/CallStack, /*CSId=*/llvm::memprof::hashCallStack(CallStack),
+      /*MB=*/Block);
   ProfData.insert({F1.hash(), FakeRecord});
 
   MemProfReader Reader(FrameIdMap, ProfData);


        


More information about the llvm-commits mailing list