[llvm] [memprof] Use CSId to construct MemProfRecord (PR #88362)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 15 12:02:53 PDT 2024


https://github.com/kazutakahirata updated https://github.com/llvm/llvm-project/pull/88362

>From 1f2144bf50929da907787df526b650d69d7fed61 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 10 Apr 2024 22:12:57 -0700
Subject: [PATCH 1/2] [memprof] Use CSId to construct MemProfRecord

We are in the process of referring to call stacks with CallStackId in
IndexedMemProfRecord and IndexedAllocationInfo instead of holding call
stacks inline (both in memory and the serialized format).  Doing so
deduplicates call stacks and reduces the MemProf profile file size.

Before we can eliminate the two fields holding call stacks inline:

- IndexedAllocationInfo::CallStack
- IndexedMemProfRecord::CallSites

we need to eliminate all the read operations on them.

This patch is a step toward that direction.  Specifically, we
eliminate the read operations in the context of MemProfReader and
RawMemProfReader.  A subsequent patch will eliminate the read
operations during the serialization.
---
 llvm/include/llvm/ProfileData/MemProfReader.h | 39 +++++++++++++++++--
 llvm/lib/ProfileData/MemProfReader.cpp        | 26 +++++++++++++
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 89f49a20a6089f..66fa8b73823972 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -57,6 +57,36 @@ class MemProfReader {
     return FunctionProfileData;
   }
 
+  // Convert IndexedMemProfRecord to MemProfRecord, populating call stacks and
+  // frames inline.
+  virtual MemProfRecord convertIndexedMemProfRecordToMemProfRecord(
+      const IndexedMemProfRecord &IndexedRecord,
+      std::function<const Frame(const FrameId)> Callback) {
+    MemProfRecord Record;
+
+    for (const memprof::IndexedAllocationInfo &IndexedAI :
+         IndexedRecord.AllocSites) {
+      memprof::AllocationInfo AI;
+      AI.Info = IndexedAI.Info;
+      auto CSIter = CSIdToCallStack.find(IndexedAI.CSId);
+      assert(CSIter != CSIdToCallStack.end());
+      for (memprof::FrameId Id : CSIter->second)
+        AI.CallStack.push_back(Callback(Id));
+      Record.AllocSites.push_back(AI);
+    }
+
+    for (memprof::CallStackId CSId : IndexedRecord.CallSiteIds) {
+      auto CSIter = CSIdToCallStack.find(CSId);
+      assert(CSIter != CSIdToCallStack.end());
+      llvm::SmallVector<memprof::Frame> Frames;
+      for (memprof::FrameId Id : CSIter->second)
+        Frames.push_back(Callback(Id));
+      Record.CallSites.push_back(Frames);
+    }
+
+    return Record;
+  }
+
   virtual Error
   readNextRecord(GuidMemProfRecordPair &GuidRecord,
                  std::function<const Frame(const FrameId)> Callback = nullptr) {
@@ -71,7 +101,8 @@ class MemProfReader {
           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
 
     const IndexedMemProfRecord &IndexedRecord = Iter->second;
-    GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)};
+    GuidRecord = {Iter->first, convertIndexedMemProfRecordToMemProfRecord(
+                                   IndexedRecord, Callback)};
     Iter++;
     return Error::success();
   }
@@ -84,9 +115,7 @@ class MemProfReader {
   // Initialize the MemProfReader with the frame mappings and profile contents.
   MemProfReader(
       llvm::DenseMap<FrameId, Frame> FrameIdMap,
-      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
-      : IdToFrame(std::move(FrameIdMap)),
-        FunctionProfileData(std::move(ProfData)) {}
+      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData);
 
 protected:
   // A helper method to extract the frame from the IdToFrame map.
@@ -97,6 +126,8 @@ class MemProfReader {
   }
   // A mapping from FrameId (a hash of the contents) to the frame.
   llvm::DenseMap<FrameId, Frame> IdToFrame;
+  // A mapping from CallStackId to the call stack.
+  llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdToCallStack;
   // A mapping from function GUID, hash of the canonical function symbol to the
   // memprof profile data for that function, i.e allocation and callsite info.
   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 4ccec26597c098..a6a39a55d9980c 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -183,6 +183,28 @@ std::string getBuildIdString(const SegmentEntry &Entry) {
 }
 } // namespace
 
+MemProfReader::MemProfReader(
+    llvm::DenseMap<FrameId, Frame> FrameIdMap,
+    llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
+    : IdToFrame(std::move(FrameIdMap)),
+      FunctionProfileData(std::move(ProfData)) {
+  // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord
+  // while storing CallStack in CSIdToCallStack.
+  for (auto &[GUID, Record] : FunctionProfileData) {
+    (void)GUID;
+    for (auto &AS : Record.AllocSites) {
+      CallStackId CSId = hashCallStack(AS.CallStack);
+      AS.CSId = CSId;
+      CSIdToCallStack.insert({CSId, AS.CallStack});
+    }
+    for (auto &CS : Record.CallSites) {
+      CallStackId CSId = hashCallStack(CS);
+      Record.CallSiteIds.push_back(CSId);
+      CSIdToCallStack.insert({CSId, CS});
+    }
+  }
+}
+
 Expected<std::unique_ptr<RawMemProfReader>>
 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
                          bool KeepName) {
@@ -447,6 +469,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     }
 
     CallStackId CSId = hashCallStack(Callstack);
+    CSIdToCallStack.insert({CSId, Callstack});
 
     // We attach the memprof record to each function bottom-up including the
     // first non-inline frame.
@@ -469,7 +492,10 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
     IndexedMemProfRecord &Record = Result.first->second;
     for (LocationPtr Loc : Locs) {
+      CallStackId CSId = hashCallStack(*Loc);
+      CSIdToCallStack.insert({CSId, *Loc});
       Record.CallSites.push_back(*Loc);
+      Record.CallSiteIds.push_back(CSId);
     }
   }
 

>From 24723d8a09ac99ccbcfedcddb507afb9865ca6ee Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Sat, 13 Apr 2024 13:19:45 -0700
Subject: [PATCH 2/2] Create IndexedMemProfRecord::toMemProfRecord.

Add a unit test.
---
 llvm/include/llvm/ProfileData/MemProf.h       |  6 ++
 llvm/include/llvm/ProfileData/MemProfReader.h | 45 +++-------
 llvm/lib/ProfileData/MemProf.cpp              | 18 ++++
 llvm/unittests/ProfileData/MemProfTest.cpp    | 84 +++++++++++++++++++
 4 files changed, 121 insertions(+), 32 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 0431c182276ec6..acd322a6d0f796 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -16,6 +16,8 @@
 namespace llvm {
 namespace memprof {
 
+struct MemProfRecord;
+
 // The versions of the indexed MemProf format
 enum IndexedVersion : uint64_t {
   // Version 0: This version didn't have a version field.
@@ -392,6 +394,10 @@ struct IndexedMemProfRecord {
                                           const unsigned char *Buffer,
                                           IndexedVersion Version);
 
+  MemProfRecord toMemProfRecord(
+      std::function<const llvm::SmallVector<Frame>(const CallStackId)> Callback)
+      const;
+
   // Returns the GUID for the function name after canonicalization. For
   // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
   // mapped to functions using this GUID.
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 66fa8b73823972..1f84fefad03e39 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -57,36 +57,6 @@ class MemProfReader {
     return FunctionProfileData;
   }
 
-  // Convert IndexedMemProfRecord to MemProfRecord, populating call stacks and
-  // frames inline.
-  virtual MemProfRecord convertIndexedMemProfRecordToMemProfRecord(
-      const IndexedMemProfRecord &IndexedRecord,
-      std::function<const Frame(const FrameId)> Callback) {
-    MemProfRecord Record;
-
-    for (const memprof::IndexedAllocationInfo &IndexedAI :
-         IndexedRecord.AllocSites) {
-      memprof::AllocationInfo AI;
-      AI.Info = IndexedAI.Info;
-      auto CSIter = CSIdToCallStack.find(IndexedAI.CSId);
-      assert(CSIter != CSIdToCallStack.end());
-      for (memprof::FrameId Id : CSIter->second)
-        AI.CallStack.push_back(Callback(Id));
-      Record.AllocSites.push_back(AI);
-    }
-
-    for (memprof::CallStackId CSId : IndexedRecord.CallSiteIds) {
-      auto CSIter = CSIdToCallStack.find(CSId);
-      assert(CSIter != CSIdToCallStack.end());
-      llvm::SmallVector<memprof::Frame> Frames;
-      for (memprof::FrameId Id : CSIter->second)
-        Frames.push_back(Callback(Id));
-      Record.CallSites.push_back(Frames);
-    }
-
-    return Record;
-  }
-
   virtual Error
   readNextRecord(GuidMemProfRecordPair &GuidRecord,
                  std::function<const Frame(const FrameId)> Callback = nullptr) {
@@ -100,9 +70,20 @@ class MemProfReader {
       Callback =
           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
 
+    auto CallStackCallback = [&](CallStackId CSId) {
+      llvm::SmallVector<Frame> CallStack;
+      auto Iter = CSIdToCallStack.find(CSId);
+      assert(Iter != CSIdToCallStack.end());
+      for (FrameId Id : Iter->second)
+        CallStack.push_back(Callback(Id));
+      return CallStack;
+    };
+
     const IndexedMemProfRecord &IndexedRecord = Iter->second;
-    GuidRecord = {Iter->first, convertIndexedMemProfRecordToMemProfRecord(
-                                   IndexedRecord, Callback)};
+    GuidRecord = {
+        Iter->first,
+        IndexedRecord.toMemProfRecord(CallStackCallback),
+    };
     Iter++;
     return Error::success();
   }
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 96aeedf2e6913e..444dbb8221b477 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -222,6 +222,24 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
   llvm_unreachable("unsupported MemProf version");
 }
 
+MemProfRecord IndexedMemProfRecord::toMemProfRecord(
+    std::function<const llvm::SmallVector<Frame>(const CallStackId)> Callback)
+    const {
+  MemProfRecord Record;
+
+  for (const memprof::IndexedAllocationInfo &IndexedAI : AllocSites) {
+    memprof::AllocationInfo AI;
+    AI.Info = IndexedAI.Info;
+    AI.CallStack = Callback(IndexedAI.CSId);
+    Record.AllocSites.push_back(AI);
+  }
+
+  for (memprof::CallStackId CSId : CallSiteIds)
+    Record.CallSites.push_back(Callback(CSId));
+
+  return Record;
+}
+
 GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
   // Canonicalize the function name to drop suffixes such as ".llvm.". Note
   // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 9cf307472d656e..4a6bb7b18f4b05 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -21,9 +21,11 @@ using ::llvm::DILineInfo;
 using ::llvm::DILineInfoSpecifier;
 using ::llvm::DILocal;
 using ::llvm::StringRef;
+using ::llvm::memprof::CallStackId;
 using ::llvm::memprof::CallStackMap;
 using ::llvm::memprof::Frame;
 using ::llvm::memprof::FrameId;
+using ::llvm::memprof::IndexedAllocationInfo;
 using ::llvm::memprof::IndexedMemProfRecord;
 using ::llvm::memprof::MemInfoBlock;
 using ::llvm::memprof::MemProfReader;
@@ -432,4 +434,86 @@ TEST(MemProf, BaseMemProfReader) {
   EXPECT_THAT(Records[0].AllocSites[0].CallStack[1],
               FrameContains("bar", 10U, 2U, false));
 }
+
+TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
+  // Verify that MemProfRecord can be constructed from IndexedMemProfRecord with
+  // CallStackIds only.
+
+  llvm::DenseMap<FrameId, Frame> FrameIdMap;
+  Frame F1(1, 0, 0, false);
+  Frame F2(2, 0, 0, false);
+  Frame F3(3, 0, 0, false);
+  Frame F4(4, 0, 0, false);
+  FrameIdMap.insert({F1.hash(), F1});
+  FrameIdMap.insert({F2.hash(), F2});
+  FrameIdMap.insert({F3.hash(), F3});
+  FrameIdMap.insert({F4.hash(), F4});
+
+  llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CallStackIdMap;
+  llvm::SmallVector<FrameId> CS1 = {F1.hash(), F2.hash()};
+  llvm::SmallVector<FrameId> CS2 = {F1.hash(), F3.hash()};
+  llvm::SmallVector<FrameId> CS3 = {F2.hash(), F3.hash()};
+  llvm::SmallVector<FrameId> CS4 = {F2.hash(), F4.hash()};
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS1), CS1});
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS2), CS2});
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS3), CS3});
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS4), CS4});
+
+  IndexedMemProfRecord IndexedRecord;
+  IndexedAllocationInfo AI;
+  AI.CSId = llvm::memprof::hashCallStack(CS1);
+  IndexedRecord.AllocSites.push_back(AI);
+  AI.CSId = llvm::memprof::hashCallStack(CS2);
+  IndexedRecord.AllocSites.push_back(AI);
+  IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS3));
+  IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS4));
+
+  bool CSIdMissing = false;
+  bool FrameIdMissing = false;
+
+  auto Callback = [&](CallStackId CSId) -> llvm::SmallVector<Frame> {
+    llvm::SmallVector<Frame> CallStack;
+    llvm::SmallVector<FrameId> FrameIds;
+
+    auto Iter = CallStackIdMap.find(CSId);
+    if (Iter == CallStackIdMap.end())
+      CSIdMissing = true;
+    else
+      FrameIds = Iter->second;
+
+    for (FrameId Id : FrameIds) {
+      Frame F(0, 0, 0, false);
+      auto Iter = FrameIdMap.find(Id);
+      if (Iter == FrameIdMap.end())
+        FrameIdMissing = true;
+      else
+        F = Iter->second;
+      CallStack.push_back(F);
+    }
+
+    return CallStack;
+  };
+
+  MemProfRecord Record = IndexedRecord.toMemProfRecord(Callback);
+
+  // Make sure that all lookups are successful.
+  ASSERT_EQ(CSIdMissing, false);
+  ASSERT_EQ(FrameIdMissing, false);
+
+  // Verify the contents of Record.
+  ASSERT_EQ(Record.AllocSites.size(), 2U);
+  ASSERT_EQ(Record.AllocSites[0].CallStack.size(), 2U);
+  ASSERT_EQ(Record.AllocSites[0].CallStack[0].hash(), F1.hash());
+  ASSERT_EQ(Record.AllocSites[0].CallStack[1].hash(), F2.hash());
+  ASSERT_EQ(Record.AllocSites[1].CallStack.size(), 2U);
+  ASSERT_EQ(Record.AllocSites[1].CallStack[0].hash(), F1.hash());
+  ASSERT_EQ(Record.AllocSites[1].CallStack[1].hash(), F3.hash());
+  ASSERT_EQ(Record.CallSites.size(), 2U);
+  ASSERT_EQ(Record.CallSites[0].size(), 2U);
+  ASSERT_EQ(Record.CallSites[0][0].hash(), F2.hash());
+  ASSERT_EQ(Record.CallSites[0][1].hash(), F3.hash());
+  ASSERT_EQ(Record.CallSites[1].size(), 2U);
+  ASSERT_EQ(Record.CallSites[1][0].hash(), F2.hash());
+  ASSERT_EQ(Record.CallSites[1][1].hash(), F4.hash());
+}
 } // namespace



More information about the llvm-commits mailing list