[llvm] [memprof] Add YAML read/write support to llvm-profdata (PR #118915)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 5 19:59:53 PST 2024


https://github.com/kazutakahirata created https://github.com/llvm/llvm-project/pull/118915

This patch adds YAML read/write support to llvm-profdata.  The primary
intent is to accommodate MemProf profiles in test cases, thereby
avoiding the binary format.

The read support is via llvm-profdata merge.  This is useful when we
want to verify that the compiler does the right thing on a given .ll
file and a MemProf profile in a test case.  In the test case, we would
convert the MemProf profile in YAML to an indexed profile and invoke
the compiler on the .ll file along with the indexed profile.

The write support is via llvm-profdata show --memory.  This is useful
when we wish to convert an indexed MemProf profile to YAML while
writing tests.  We would compile a test case in C++, run it for an
indexed MemProf profile, and then convert it to the text format.


>From 66d641276a4d5a1f63e6010a5facef0b82799cce Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 4 Dec 2024 10:23:21 -0800
Subject: [PATCH] [memprof] Add YAML read/write support to llvm-profdata

This patch adds YAML read/write support to llvm-profdata.  The primary
intent is to accommodate MemProf profiles in test cases, thereby
avoiding the binary format.

The read support is via llvm-profdata merge.  This is useful when we
want to verify that the compiler does the right thing on a given .ll
file and a MemProf profile in a test case.  In the test case, we would
convert the MemProf profile in YAML to an indexed profile and invoke
the compiler on the .ll file along with the indexed profile.

The write support is via llvm-profdata show --memory.  This is useful
when we wish to convert an indexed MemProf profile to YAML while
writing tests.  We would compile a test case in C++, run it for an
indexed MemProf profile, and then convert it to the text format.
---
 .../llvm/ProfileData/InstrProfReader.h        |  7 ++
 llvm/include/llvm/ProfileData/MemProfReader.h | 15 ++++
 llvm/lib/ProfileData/InstrProfReader.cpp      |  8 ++
 llvm/lib/ProfileData/MemProfReader.cpp        | 30 +++++++
 .../tools/llvm-profdata/memprof-yaml.test     | 33 +++++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    | 89 ++++++++++++++++---
 6 files changed, 170 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profdata/memprof-yaml.test

diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 330cf540c099be..95a8171021b431 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -716,6 +716,9 @@ class IndexedMemProfReader {
 
   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
   getMemProfCallerCalleePairs() const;
+
+  // Return a vector of all GUIDs that we have corresponding MemProfRecords for.
+  SmallVector<uint64_t, 0> getMemProfRecordKeys() const;
 };
 
 /// Reader for the indexed binary instrprof format.
@@ -823,6 +826,10 @@ class IndexedInstrProfReader : public InstrProfReader {
     return MemProfReader.getMemProfCallerCalleePairs();
   }
 
+  SmallVector<uint64_t, 0> getMemProfRecordKeys() {
+    return MemProfReader.getMemProfRecordKeys();
+  }
+
   /// Fill Counts with the profile data for the given function name.
   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
                           std::vector<uint64_t> &Counts);
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 0529f794606465..421985b55056c2 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -213,6 +213,21 @@ class RawMemProfReader final : public MemProfReader {
 class YAMLMemProfReader final : public MemProfReader {
 public:
   YAMLMemProfReader() = default;
+
+  // Return true if the \p DataBuffer starts with magic bytes indicating it is
+  // a raw binary memprof profile.
+  static bool hasFormat(const MemoryBuffer &DataBuffer);
+  // Return true if the file at \p Path starts with magic bytes indicating it is
+  // a raw binary memprof profile.
+  static bool hasFormat(const StringRef Path);
+
+  // Create a RawMemProfReader after sanity checking the contents of the file at
+  // \p Path or the \p Buffer. The binary from which the profile has been
+  // collected is specified via a path in \p ProfiledBinary.
+  static Expected<std::unique_ptr<YAMLMemProfReader>> create(const Twine &Path);
+  static Expected<std::unique_ptr<YAMLMemProfReader>>
+  create(std::unique_ptr<MemoryBuffer> Buffer);
+
   void parse(StringRef YAMLData);
 };
 } // namespace memprof
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index dad79b2c1761e9..cd10429cd742b5 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1664,6 +1664,14 @@ IndexedMemProfReader::getMemProfCallerCalleePairs() const {
   return Pairs;
 }
 
+SmallVector<uint64_t, 0> IndexedMemProfReader::getMemProfRecordKeys() const {
+  SmallVector<uint64_t, 0> Keys;
+  Keys.reserve(MemProfRecordTable->getNumEntries());
+  for (uint64_t Key : MemProfRecordTable->keys())
+    Keys.push_back(Key);
+  return Keys;
+}
+
 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
                                                 uint64_t FuncHash,
                                                 std::vector<uint64_t> &Counts) {
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 9dacf298985937..8325fc75aaa852 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -755,6 +755,36 @@ Error RawMemProfReader::readNextRecord(
   return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
 }
 
+Expected<std::unique_ptr<YAMLMemProfReader>>
+YAMLMemProfReader::create(const Twine &Path) {
+  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+  if (std::error_code EC = BufferOr.getError())
+    return report(errorCodeToError(EC), Path.getSingleStringRef());
+
+  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+  return create(std::move(Buffer));
+}
+
+Expected<std::unique_ptr<YAMLMemProfReader>>
+YAMLMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+  std::unique_ptr<YAMLMemProfReader> Reader(new YAMLMemProfReader());
+  Reader->parse(Buffer->getBuffer());
+  return std::move(Reader);
+}
+
+bool YAMLMemProfReader::hasFormat(const StringRef Path) {
+  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+  if (!BufferOr)
+    return false;
+
+  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+  return hasFormat(*Buffer);
+}
+
+bool YAMLMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
+  return Buffer.getBuffer().starts_with("---");
+}
+
 void YAMLMemProfReader::parse(StringRef YAMLData) {
   memprof::AllMemProfData Doc;
   yaml::Input Yin(YAMLData);
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
new file mode 100644
index 00000000000000..9875faf355582a
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -0,0 +1,33 @@
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof-in.yaml -o %t/memprof-out.indexed
+; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
+; RUN: cmp %t/memprof-in.yaml %t/memprof-out.yaml
+
+; Verify that the YAML output is identical to the YAML input.
+;--- memprof-in.yaml
+---
+HeapProfileRecords:
+  - GUID:            16045690981402826360
+    AllocSites:
+      - Callstack:
+          - { Function: 100, LineOffset: 11, Column: 10, IsInlineFrame: true }
+          - { Function: 200, LineOffset: 22, Column: 20, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      111
+          TotalSize:       222
+          TotalLifetime:   333
+          TotalLifetimeAccessDensity: 444
+      - Callstack:
+          - { Function: 300, LineOffset: 33, Column: 30, IsInlineFrame: false }
+          - { Function: 400, LineOffset: 44, Column: 40, IsInlineFrame: true }
+        MemInfoBlock:
+          AllocCount:      555
+          TotalSize:       666
+          TotalLifetime:   777
+          TotalLifetimeAccessDensity: 888
+    CallSites:
+      - - { Function: 500, LineOffset: 55, Column: 50, IsInlineFrame: true }
+        - { Function: 600, LineOffset: 66, Column: 60, IsInlineFrame: false }
+      - - { Function: 700, LineOffset: 77, Column: 70, IsInlineFrame: true }
+        - { Function: 800, LineOffset: 88, Column: 80, IsInlineFrame: false }
+...
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1d9d7bcf765496..69973db5328534 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -723,6 +723,35 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
     return;
   }
 
+  using ::llvm::memprof::YAMLMemProfReader;
+  if (YAMLMemProfReader::hasFormat(Input.Filename)) {
+    auto ReaderOrErr = YAMLMemProfReader::create(Input.Filename);
+    if (!ReaderOrErr) {
+      exitWithError(ReaderOrErr.takeError(), Input.Filename);
+    }
+    std::unique_ptr<YAMLMemProfReader> Reader = std::move(ReaderOrErr.get());
+    // Check if the profile types can be merged, e.g. clang frontend profiles
+    // should not be merged with memprof profiles.
+    if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
+      consumeError(std::move(E));
+      WC->Errors.emplace_back(
+          make_error<StringError>(
+              "Cannot merge MemProf profile with Clang generated profile.",
+              std::error_code()),
+          Filename);
+      return;
+    }
+
+    auto MemProfError = [&](Error E) {
+      auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
+      WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
+                              Filename);
+    };
+
+    WC->Writer.addMemProfData(Reader->takeMemProfData(), MemProfError);
+    return;
+  }
+
   auto FS = vfs::getRealFileSystem();
   // TODO: This only saves the first non-fatal error from InstrProfReader, and
   // then added to WriterContext::Errors. However, this is not extensible, if
@@ -3242,18 +3271,54 @@ static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
 static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
   if (SFormat == ShowFormat::Json)
     exitWithError("JSON output is not supported for MemProf");
-  auto ReaderOr = llvm::memprof::RawMemProfReader::create(
-      Filename, ProfiledBinary, /*KeepNames=*/true);
-  if (Error E = ReaderOr.takeError())
-    // Since the error can be related to the profile or the binary we do not
-    // pass whence. Instead additional context is provided where necessary in
-    // the error message.
-    exitWithError(std::move(E), /*Whence*/ "");
-
-  std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
-      ReaderOr.get().release());
-
-  Reader->printYAML(OS);
+
+  // Load the file to check the magic bytes.
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrError =
+      llvm::MemoryBuffer::getFile(Filename);
+  if (auto EC = BufferOrError.getError())
+    exitWithError("Error opening profile file '" + Filename + "'");
+  auto Buffer = std::move(BufferOrError.get());
+
+  // Show the raw profile in YAML.
+  if (memprof::RawMemProfReader::hasFormat(*Buffer)) {
+    auto ReaderOr = llvm::memprof::RawMemProfReader::create(
+        Filename, ProfiledBinary, /*KeepNames=*/true);
+    if (Error E = ReaderOr.takeError())
+      // Since the error can be related to the profile or the binary we do not
+      // pass whence. Instead additional context is provided where necessary in
+      // the error message.
+      exitWithError(std::move(E), /*Whence*/ "");
+
+    std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
+        ReaderOr.get().release());
+
+    Reader->printYAML(OS);
+    return 0;
+  }
+
+  // Show the indexed MemProf profile in YAML.
+  auto FS = vfs::getRealFileSystem();
+  auto ReaderOrErr = IndexedInstrProfReader::create(Filename, *FS);
+  if (Error E = ReaderOrErr.takeError())
+    exitWithError(std::move(E), Filename);
+
+  auto Reader = std::move(ReaderOrErr.get());
+
+  // Build pairs of GUID and MemProfRecord.
+  memprof::AllMemProfData Data;
+  for (const uint64_t Key : Reader->getMemProfRecordKeys()) {
+    auto Record = Reader->getMemProfRecord(Key);
+    if (Record.takeError())
+      continue;
+    memprof::GUIDMemProfRecordPair Pair;
+    Pair.GUID = Key;
+    Pair.Record = std::move(*Record);
+    Data.HeapProfileRecords.push_back(std::move(Pair));
+  }
+
+  yaml::Output Yout(OS);
+  Yout << Data;
+
   return 0;
 }
 



More information about the llvm-commits mailing list