[llvm] [memprof] Use CSId to construct MemProfRecord (PR #88362)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 11 00:48:27 PDT 2024


https://github.com/kazutakahirata created https://github.com/llvm/llvm-project/pull/88362

We are in the process of referring to call stacks with CallStackId in
IndexedMemProfRecord and IndexedAllocationInfo instead of holding call
stacks inline (both in memory and the serialized format).  Doing so
deduplicates call stacks and reduces the MemProf profile file size.

Before we can eliminate the two fields holding call stacks inline:

- IndexedAllocationInfo::CallStack
- IndexedMemProfRecord::CallSites

we need to eliminate all the read operations on them.

This patch is a step toward that direction.  Specifically, we
eliminate the read operations in the context of MemProfReader and
RawMemProfReader.  A subsequent patch will eliminate the read
operations during the serialization.


>From 1f2144bf50929da907787df526b650d69d7fed61 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 10 Apr 2024 22:12:57 -0700
Subject: [PATCH] [memprof] Use CSId to construct MemProfRecord

We are in the process of referring to call stacks with CallStackId in
IndexedMemProfRecord and IndexedAllocationInfo instead of holding call
stacks inline (both in memory and the serialized format).  Doing so
deduplicates call stacks and reduces the MemProf profile file size.

Before we can eliminate the two fields holding call stacks inline:

- IndexedAllocationInfo::CallStack
- IndexedMemProfRecord::CallSites

we need to eliminate all the read operations on them.

This patch is a step toward that direction.  Specifically, we
eliminate the read operations in the context of MemProfReader and
RawMemProfReader.  A subsequent patch will eliminate the read
operations during the serialization.
---
 llvm/include/llvm/ProfileData/MemProfReader.h | 39 +++++++++++++++++--
 llvm/lib/ProfileData/MemProfReader.cpp        | 26 +++++++++++++
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 89f49a20a6089f..66fa8b73823972 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -57,6 +57,36 @@ class MemProfReader {
     return FunctionProfileData;
   }
 
+  // Convert IndexedMemProfRecord to MemProfRecord, populating call stacks and
+  // frames inline.
+  virtual MemProfRecord convertIndexedMemProfRecordToMemProfRecord(
+      const IndexedMemProfRecord &IndexedRecord,
+      std::function<const Frame(const FrameId)> Callback) {
+    MemProfRecord Record;
+
+    for (const memprof::IndexedAllocationInfo &IndexedAI :
+         IndexedRecord.AllocSites) {
+      memprof::AllocationInfo AI;
+      AI.Info = IndexedAI.Info;
+      auto CSIter = CSIdToCallStack.find(IndexedAI.CSId);
+      assert(CSIter != CSIdToCallStack.end());
+      for (memprof::FrameId Id : CSIter->second)
+        AI.CallStack.push_back(Callback(Id));
+      Record.AllocSites.push_back(AI);
+    }
+
+    for (memprof::CallStackId CSId : IndexedRecord.CallSiteIds) {
+      auto CSIter = CSIdToCallStack.find(CSId);
+      assert(CSIter != CSIdToCallStack.end());
+      llvm::SmallVector<memprof::Frame> Frames;
+      for (memprof::FrameId Id : CSIter->second)
+        Frames.push_back(Callback(Id));
+      Record.CallSites.push_back(Frames);
+    }
+
+    return Record;
+  }
+
   virtual Error
   readNextRecord(GuidMemProfRecordPair &GuidRecord,
                  std::function<const Frame(const FrameId)> Callback = nullptr) {
@@ -71,7 +101,8 @@ class MemProfReader {
           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
 
     const IndexedMemProfRecord &IndexedRecord = Iter->second;
-    GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)};
+    GuidRecord = {Iter->first, convertIndexedMemProfRecordToMemProfRecord(
+                                   IndexedRecord, Callback)};
     Iter++;
     return Error::success();
   }
@@ -84,9 +115,7 @@ class MemProfReader {
   // Initialize the MemProfReader with the frame mappings and profile contents.
   MemProfReader(
       llvm::DenseMap<FrameId, Frame> FrameIdMap,
-      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
-      : IdToFrame(std::move(FrameIdMap)),
-        FunctionProfileData(std::move(ProfData)) {}
+      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData);
 
 protected:
   // A helper method to extract the frame from the IdToFrame map.
@@ -97,6 +126,8 @@ class MemProfReader {
   }
   // A mapping from FrameId (a hash of the contents) to the frame.
   llvm::DenseMap<FrameId, Frame> IdToFrame;
+  // A mapping from CallStackId to the call stack.
+  llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdToCallStack;
   // A mapping from function GUID, hash of the canonical function symbol to the
   // memprof profile data for that function, i.e allocation and callsite info.
   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 4ccec26597c098..a6a39a55d9980c 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -183,6 +183,28 @@ std::string getBuildIdString(const SegmentEntry &Entry) {
 }
 } // namespace
 
+MemProfReader::MemProfReader(
+    llvm::DenseMap<FrameId, Frame> FrameIdMap,
+    llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
+    : IdToFrame(std::move(FrameIdMap)),
+      FunctionProfileData(std::move(ProfData)) {
+  // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord
+  // while storing CallStack in CSIdToCallStack.
+  for (auto &[GUID, Record] : FunctionProfileData) {
+    (void)GUID;
+    for (auto &AS : Record.AllocSites) {
+      CallStackId CSId = hashCallStack(AS.CallStack);
+      AS.CSId = CSId;
+      CSIdToCallStack.insert({CSId, AS.CallStack});
+    }
+    for (auto &CS : Record.CallSites) {
+      CallStackId CSId = hashCallStack(CS);
+      Record.CallSiteIds.push_back(CSId);
+      CSIdToCallStack.insert({CSId, CS});
+    }
+  }
+}
+
 Expected<std::unique_ptr<RawMemProfReader>>
 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
                          bool KeepName) {
@@ -447,6 +469,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     }
 
     CallStackId CSId = hashCallStack(Callstack);
+    CSIdToCallStack.insert({CSId, Callstack});
 
     // We attach the memprof record to each function bottom-up including the
     // first non-inline frame.
@@ -469,7 +492,10 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
     IndexedMemProfRecord &Record = Result.first->second;
     for (LocationPtr Loc : Locs) {
+      CallStackId CSId = hashCallStack(*Loc);
+      CSIdToCallStack.insert({CSId, *Loc});
       Record.CallSites.push_back(*Loc);
+      Record.CallSiteIds.push_back(CSId);
     }
   }
 



More information about the llvm-commits mailing list