[llvm] 4c8ec8f - [memprof] Reduce schema for Version2 (#89876)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 24 16:25:40 PDT 2024


Author: Kazu Hirata
Date: 2024-04-24T16:25:35-07:00
New Revision: 4c8ec8f8bc3fb4dda4fd36c3b2ad745bd3451970

URL: https://github.com/llvm/llvm-project/commit/4c8ec8f8bc3fb4dda4fd36c3b2ad745bd3451970
DIFF: https://github.com/llvm/llvm-project/commit/4c8ec8f8bc3fb4dda4fd36c3b2ad745bd3451970.diff

LOG: [memprof] Reduce schema for Version2 (#89876)

Curently, the compiler only uses several fields of MemoryInfoBlock.
Serializing all fields into the indexed MemProf file simply wastes
storage.

This patch limits the schema down to four fields for Version2 by
default.  It retains the old behavior of serializing all fields via:

  llvm-profdata merge --memprof-version=2 --memprof-full-schema

This patch reduces the size of the indexed MemProf profile I have by
40% (1.6GB down to 1.0GB).

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/InstrProfWriter.h
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/lib/ProfileData/InstrProfWriter.cpp
    llvm/test/tools/llvm-profdata/memprof-merge-v0.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/InstrProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index b0ae8f364fcafb..08db8fa6e7ef2c 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -85,11 +85,15 @@ class InstrProfWriter {
   // The MemProf version we should write.
   memprof::IndexedVersion MemProfVersionRequested;
 
+  // Whether to serialize the full schema.
+  bool MemProfFullSchema;
+
 public:
   InstrProfWriter(
       bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0,
       uint64_t MaxTemporalProfTraceLength = 0, bool WritePrevVersion = false,
-      memprof::IndexedVersion MemProfVersionRequested = memprof::Version0);
+      memprof::IndexedVersion MemProfVersionRequested = memprof::Version0,
+      bool MemProfFullSchema = false);
   ~InstrProfWriter();
 
   StringMap<ProfilingData> &getProfileData() { return FunctionData; }
@@ -203,6 +207,7 @@ class InstrProfWriter {
   void setMemProfVersionRequested(memprof::IndexedVersion Version) {
     MemProfVersionRequested = Version;
   }
+  void setMemProfFullSchema(bool Full) { MemProfFullSchema = Full; }
   // Compute the overlap b/w this object and Other. Program level result is
   // stored in Overlap and function level result is stored in FuncLevelOverlap.
   void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,

diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 37019bcab5448d..a8e98f8bb13861 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -117,7 +117,7 @@ struct PortableMemInfoBlock {
   void clear() { *this = PortableMemInfoBlock(); }
 
   // Returns the full schema currently in use.
-  static MemProfSchema getSchema() {
+  static MemProfSchema getFullSchema() {
     MemProfSchema List;
 #define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
 #include "llvm/ProfileData/MIBEntryDef.inc"
@@ -125,6 +125,13 @@ struct PortableMemInfoBlock {
     return List;
   }
 
+  // Returns the schema consisting of the fields currently consumed by the
+  // compiler.
+  static MemProfSchema getHotColdSchema() {
+    return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
+            Meta::TotalLifetimeAccessDensity};
+  }
+
   bool operator==(const PortableMemInfoBlock &Other) const {
 #define MIBEntryDef(NameTag, Name, Type)                                       \
   if (Other.get##Name() != get##Name())                                        \

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 4a6fc9d64b6900..70ae57f77daef7 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -184,12 +184,13 @@ class InstrProfRecordWriterTrait {
 InstrProfWriter::InstrProfWriter(
     bool Sparse, uint64_t TemporalProfTraceReservoirSize,
     uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
-    memprof::IndexedVersion MemProfVersionRequested)
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)
     : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
       TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
       InfoObj(new InstrProfRecordWriterTrait()),
       WritePrevVersion(WritePrevVersion),
-      MemProfVersionRequested(MemProfVersionRequested) {}
+      MemProfVersionRequested(MemProfVersionRequested),
+      MemProfFullSchema(MemProfFullSchema) {}
 
 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
 
@@ -507,7 +508,7 @@ static Error writeMemProfV0(
   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
   OS.write(0ULL); // Reserve space for the memprof frame table offset.
 
-  auto Schema = memprof::PortableMemInfoBlock::getSchema();
+  auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
@@ -533,7 +534,7 @@ static Error writeMemProfV1(
   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
   OS.write(0ULL); // Reserve space for the memprof frame table offset.
 
-  auto Schema = memprof::PortableMemInfoBlock::getSchema();
+  auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
@@ -554,7 +555,8 @@ static Error writeMemProfV2(
         &MemProfRecordData,
     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
-        &MemProfCallStackData) {
+        &MemProfCallStackData,
+    bool MemProfFullSchema) {
   OS.write(memprof::Version2);
   uint64_t HeaderUpdatePos = OS.tell();
   OS.write(0ULL); // Reserve space for the memprof record table offset.
@@ -563,7 +565,9 @@ static Error writeMemProfV2(
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof call stack table offset.
 
-  auto Schema = memprof::PortableMemInfoBlock::getSchema();
+  auto Schema = memprof::PortableMemInfoBlock::getHotColdSchema();
+  if (MemProfFullSchema)
+    Schema = memprof::PortableMemInfoBlock::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
@@ -605,7 +609,7 @@ static Error writeMemProf(
     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
         &MemProfCallStackData,
-    memprof::IndexedVersion MemProfVersionRequested) {
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) {
 
   switch (MemProfVersionRequested) {
   case memprof::Version0:
@@ -614,7 +618,7 @@ static Error writeMemProf(
     return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData);
   case memprof::Version2:
     return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData,
-                          MemProfCallStackData);
+                          MemProfCallStackData, MemProfFullSchema);
   }
 
   return make_error<InstrProfError>(
@@ -733,7 +737,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
     MemProfSectionStart = OS.tell();
     if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData,
-                              MemProfCallStackData, MemProfVersionRequested))
+                              MemProfCallStackData, MemProfVersionRequested,
+                              MemProfFullSchema))
       return E;
   }
 

diff  --git a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
index 03ccbdd42efdad..28f65e0781bc63 100644
--- a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
+++ b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test
@@ -16,6 +16,9 @@ RUN: llvm-profdata show %t.prof.v1 | FileCheck %s
 RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
 RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
 
+RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
+RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
+
 For now we only check the validity of the instrumented profile since we don't
 have a way to display the contents of the memprof indexed format yet.
 

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 78daf9f7dc109a..ec046ebfab130b 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -308,6 +308,10 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
                clEnumValN(memprof::Version1, "1", "version 1"),
                clEnumValN(memprof::Version2, "2", "version 2")));
 
+cl::opt<bool> MemProfFullSchema(
+    "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
+    cl::desc("Use the full schema for serialization"), cl::init(false));
+
 // Options specific to overlap subcommand.
 cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
                                   cl::desc("<base profile file>"),
@@ -600,7 +604,7 @@ struct WriterContext {
                 SmallSet<instrprof_error, 4> &WriterErrorCodes,
                 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
       : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
-               MemProfVersionRequested),
+               MemProfVersionRequested, MemProfFullSchema),
         ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
 };
 

diff  --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 73ba0a23ea5f45..edc427dcbc4540 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -370,12 +370,31 @@ static CallStackIdMapTy getCallStackMapping() {
   return Mapping;
 }
 
+// Populate all of the fields of MIB.
+MemInfoBlock makeFullMIB() {
+  MemInfoBlock MIB;
+#define MIBEntryDef(NameTag, Name, Type) MIB.NameTag;
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+  return MIB;
+}
+
+// Populate those fields returned by getHotColdSchema.
+MemInfoBlock makePartialMIB() {
+  MemInfoBlock MIB;
+  MIB.AllocCount = 1;
+  MIB.TotalSize = 5;
+  MIB.TotalLifetime = 10;
+  MIB.TotalLifetimeAccessDensity = 23;
+  return MIB;
+}
+
 IndexedMemProfRecord makeRecord(
     std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
         AllocFrames,
     std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
         CallSiteFrames,
-    const MemInfoBlock &Block = MemInfoBlock()) {
+    const MemInfoBlock &Block = makeFullMIB()) {
   llvm::memprof::IndexedMemProfRecord MR;
   for (const auto &Frames : AllocFrames)
     MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
@@ -388,7 +407,7 @@ IndexedMemProfRecord makeRecord(
 IndexedMemProfRecord
 makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
              std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames,
-             const MemInfoBlock &Block = MemInfoBlock()) {
+             const MemInfoBlock &Block) {
   llvm::memprof::IndexedMemProfRecord MR;
   for (const auto &CSId : AllocFrames)
     // We don't populate IndexedAllocationInfo::CallStack because we use it only
@@ -476,15 +495,56 @@ TEST_F(InstrProfTest, test_memprof_v0) {
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }
 
-TEST_F(InstrProfTest, test_memprof_v2) {
+struct CallStackIdConverter {
+  std::optional<memprof::FrameId> LastUnmappedFrameId;
+  std::optional<memprof::CallStackId> LastUnmappedCSId;
+
+  const FrameIdMapTy &IdToFrameMap;
+  const CallStackIdMapTy &CSIdToCallStackMap;
+
+  CallStackIdConverter() = delete;
+  CallStackIdConverter(const FrameIdMapTy &IdToFrameMap,
+                       const CallStackIdMapTy &CSIdToCallStackMap)
+      : IdToFrameMap(IdToFrameMap), CSIdToCallStackMap(CSIdToCallStackMap) {}
+
+  llvm::SmallVector<memprof::Frame>
+  operator()(::llvm::memprof::CallStackId CSId) {
+    auto IdToFrameCallback = [&](const memprof::FrameId Id) {
+      auto Iter = IdToFrameMap.find(Id);
+      if (Iter == IdToFrameMap.end()) {
+        LastUnmappedFrameId = Id;
+        return memprof::Frame(0, 0, 0, false);
+      }
+      return Iter->second;
+    };
+
+    llvm::SmallVector<memprof::Frame> Frames;
+    auto CSIter = CSIdToCallStackMap.find(CSId);
+    if (CSIter == CSIdToCallStackMap.end()) {
+      LastUnmappedCSId = CSId;
+    } else {
+      const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
+          CSIter->getSecond();
+      Frames.reserve(CS.size());
+      for (::llvm::memprof::FrameId Id : CS)
+        Frames.push_back(IdToFrameCallback(Id));
+    }
+    return Frames;
+  }
+};
+
+TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
+  const MemInfoBlock MIB = makeFullMIB();
+
   Writer.setMemProfVersionRequested(memprof::Version2);
+  Writer.setMemProfFullSchema(true);
 
   ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
                     Succeeded());
 
   const IndexedMemProfRecord IndexedMR = makeRecordV2(
       /*AllocFrames=*/{0x111, 0x222},
-      /*CallSiteFrames=*/{0x333});
+      /*CallSiteFrames=*/{0x333}, MIB);
   const FrameIdMapTy IdToFrameMap = getFrameMapping();
   const auto CSIdToCallStackMap = getCallStackMapping();
   for (const auto &I : IdToFrameMap) {
@@ -502,38 +562,54 @@ TEST_F(InstrProfTest, test_memprof_v2) {
   ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
   const memprof::MemProfRecord &Record = RecordOr.get();
 
-  std::optional<memprof::FrameId> LastUnmappedFrameId;
-  auto IdToFrameCallback = [&](const memprof::FrameId Id) {
-    auto Iter = IdToFrameMap.find(Id);
-    if (Iter == IdToFrameMap.end()) {
-      LastUnmappedFrameId = Id;
-      return memprof::Frame(0, 0, 0, false);
-    }
-    return Iter->second;
-  };
+  CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);
 
-  std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId;
-  auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) {
-    llvm::SmallVector<memprof::Frame> Frames;
-    auto CSIter = CSIdToCallStackMap.find(CSId);
-    if (CSIter == CSIdToCallStackMap.end()) {
-      LastUnmappedCSId = CSId;
-    } else {
-      const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
-          CSIter->getSecond();
-      Frames.reserve(CS.size());
-      for (::llvm::memprof::FrameId Id : CS)
-        Frames.push_back(IdToFrameCallback(Id));
-    }
-    return Frames;
-  };
+  const ::llvm::memprof::MemProfRecord WantRecord =
+      IndexedMR.toMemProfRecord(CSIdConv);
+  ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
+      << "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
+  ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
+      << "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
+  EXPECT_THAT(WantRecord, EqualsRecord(Record));
+}
+
+TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
+  const MemInfoBlock MIB = makePartialMIB();
+
+  Writer.setMemProfVersionRequested(memprof::Version2);
+  Writer.setMemProfFullSchema(false);
+
+  ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
+                    Succeeded());
+
+  const IndexedMemProfRecord IndexedMR = makeRecordV2(
+      /*AllocFrames=*/{0x111, 0x222},
+      /*CallSiteFrames=*/{0x333}, MIB);
+  const FrameIdMapTy IdToFrameMap = getFrameMapping();
+  const auto CSIdToCallStackMap = getCallStackMapping();
+  for (const auto &I : IdToFrameMap) {
+    Writer.addMemProfFrame(I.first, I.getSecond(), Err);
+  }
+  for (const auto &I : CSIdToCallStackMap) {
+    Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
+  }
+  Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
+
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  auto RecordOr = Reader->getMemProfRecord(0x9999);
+  ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
+  const memprof::MemProfRecord &Record = RecordOr.get();
+
+  CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);
 
   const ::llvm::memprof::MemProfRecord WantRecord =
-      IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
-  ASSERT_EQ(LastUnmappedFrameId, std::nullopt)
-      << "could not map frame id: " << *LastUnmappedFrameId;
-  ASSERT_EQ(LastUnmappedCSId, std::nullopt)
-      << "could not map call stack id: " << *LastUnmappedCSId;
+      IndexedMR.toMemProfRecord(CSIdConv);
+  ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
+      << "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
+  ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
+      << "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }
 


        


More information about the llvm-commits mailing list