[llvm] ec51971 - [memprof] Keep and display symbol names in the RawMemProfReader.

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Wed May 25 14:17:49 PDT 2022


Author: Snehasish Kumar
Date: 2022-05-25T21:17:44Z
New Revision: ec51971eae02a031bbfd992cf59e1f6acdadbeeb

URL: https://github.com/llvm/llvm-project/commit/ec51971eae02a031bbfd992cf59e1f6acdadbeeb
DIFF: https://github.com/llvm/llvm-project/commit/ec51971eae02a031bbfd992cf59e1f6acdadbeeb.diff

LOG: [memprof] Keep and display symbol names in the RawMemProfReader.

Extend the Frame struct to hold the symbol name if requested
when a RawMemProfReader object is constructed. This change updates the
tests and removes the need to pass --debug to obtain the mapping from
GUID to symbol names.

Reviewed By: tejohnson

Differential Revision: https://reviews.llvm.org/D126344

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/include/llvm/ProfileData/RawMemProfReader.h
    llvm/lib/ProfileData/RawMemProfReader.cpp
    llvm/test/tools/llvm-profdata/memprof-basic.test
    llvm/test/tools/llvm-profdata/memprof-inline.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 3a80ab26cd46e..0406d4fab711e 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -142,6 +142,9 @@ struct Frame {
   // A uuid (uint64_t) identifying the function. It is obtained by
   // llvm::md5(FunctionName) which returns the lower 64 bits.
   GlobalValue::GUID Function;
+  // The symbol name for the function. Only populated in the Frame by the reader
+  // if requested during initialization. This field should not be serialized.
+  llvm::Optional<std::string> SymbolName;
   // The source line offset of the call from the beginning of parent function.
   uint32_t LineOffset;
   // The source column number of the call to help distinguish multiple calls
@@ -152,6 +155,7 @@ struct Frame {
 
   Frame(const Frame &Other) {
     Function = Other.Function;
+    SymbolName = Other.SymbolName;
     LineOffset = Other.LineOffset;
     Column = Other.Column;
     IsInlineFrame = Other.IsInlineFrame;
@@ -161,12 +165,15 @@ struct Frame {
       : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
 
   bool operator==(const Frame &Other) const {
+    // Ignore the SymbolName field to avoid a string compare. Comparing the
+    // function hash serves the same purpose.
     return Other.Function == Function && Other.LineOffset == LineOffset &&
            Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
   }
 
   Frame &operator=(const Frame &Other) {
     Function = Other.Function;
+    SymbolName = Other.SymbolName;
     LineOffset = Other.LineOffset;
     Column = Other.Column;
     IsInlineFrame = Other.IsInlineFrame;
@@ -214,6 +221,8 @@ struct Frame {
   void printYAML(raw_ostream &OS) const {
     OS << "      -\n"
        << "        Function: " << Function << "\n"
+       << "        SymbolName: "
+       << (SymbolName.hasValue() ? SymbolName.getValue() : "<None>") << "\n"
        << "        LineOffset: " << LineOffset << "\n"
        << "        Column: " << Column << "\n"
        << "        Inline: " << IsInlineFrame << "\n";

diff  --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 4421b77d78a16..a81db8ee3d995 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -57,7 +57,8 @@ class RawMemProfReader {
   // \p Path. The binary from which the profile has been collected is specified
   // via a path in \p ProfiledBinary.
   static Expected<std::unique_ptr<RawMemProfReader>>
-  create(const Twine &Path, const StringRef ProfiledBinary);
+  create(const Twine &Path, const StringRef ProfiledBinary,
+         bool KeepName = false);
 
   using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
   using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>;
@@ -76,9 +77,9 @@ class RawMemProfReader {
   RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
                    llvm::SmallVectorImpl<SegmentEntry> &Seg,
                    llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
-                   CallStackMap &SM)
+                   CallStackMap &SM, bool KeepName = false)
       : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
-        CallstackProfileData(Prof), StackMap(SM) {
+        CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) {
     // We don't call initialize here since there is no raw profile to read. The
     // test should pass in the raw profile as structured data.
 
@@ -103,8 +104,9 @@ class RawMemProfReader {
 
 private:
   RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
-                   object::OwningBinary<object::Binary> &&Bin)
-      : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {}
+                   object::OwningBinary<object::Binary> &&Bin, bool KeepName)
+      : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)),
+        KeepSymbolName(KeepName) {}
   Error initialize();
   Error readRawProfile();
   // Symbolize and cache all the virtual addresses we encounter in the
@@ -146,8 +148,12 @@ class RawMemProfReader {
 
   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
-};
 
+  // Whether to keep the symbol name for each frame after hashing.
+  bool KeepSymbolName = false;
+  // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
+  llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
+};
 } // namespace memprof
 } // namespace llvm
 

diff  --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 2a619c42fa6ae..96ed9bbe5a768 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -12,6 +12,7 @@
 
 #include <algorithm>
 #include <cstdint>
+#include <memory>
 #include <type_traits>
 
 #include "llvm/ADT/ArrayRef.h"
@@ -174,7 +175,8 @@ bool isRuntimePath(const StringRef Path) {
 } // namespace
 
 Expected<std::unique_ptr<RawMemProfReader>>
-RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
+RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
+                         bool KeepName) {
   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
   if (std::error_code EC = BufferOr.getError())
     return report(errorCodeToError(EC), Path.getSingleStringRef());
@@ -193,8 +195,9 @@ RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
     return report(BinaryOr.takeError(), ProfiledBinary);
   }
 
-  std::unique_ptr<RawMemProfReader> Reader(
-      new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get())));
+  // Use new here since constructor is private.
+  std::unique_ptr<RawMemProfReader> Reader(new RawMemProfReader(
+      std::move(Buffer), std::move(BinaryOr.get()), KeepName));
   if (Error E = Reader->initialize()) {
     return std::move(E);
   }
@@ -407,21 +410,21 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
            I++) {
         const auto &DIFrame = DI.getFrame(I);
-        LLVM_DEBUG(
-            // Print out the name to guid mapping for debugging.
-            llvm::dbgs() << "FunctionName: " << DIFrame.FunctionName
-                         << " GUID: "
-                         << IndexedMemProfRecord::getGUID(DIFrame.FunctionName)
-                         << "\n";);
-
-        const Frame F(IndexedMemProfRecord::getGUID(DIFrame.FunctionName),
-                      DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
+        const uint64_t Guid =
+            IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
+        const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
                       // Only the last entry is not an inlined location.
                       I != NumFrames - 1);
-
-        const FrameId Id = F.hash();
-        IdToFrame.insert({Id, F});
-        SymbolizedFrame[VAddr].push_back(Id);
+        // Here we retain a mapping from the GUID to symbol name instead of
+        // adding it to the frame object directly to reduce memory overhead.
+        // This is because there can be many unique frames, particularly for
+        // callsite frames.
+        if (KeepSymbolName)
+          GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+
+        const FrameId Hash = F.hash();
+        IdToFrame.insert({Hash, F});
+        SymbolizedFrame[VAddr].push_back(Hash);
       }
     }
 
@@ -525,8 +528,15 @@ Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
     return make_error<InstrProfError>(instrprof_error::eof);
 
   auto IdToFrameCallback = [this](const FrameId Id) {
-    return this->idToFrame(Id);
+    Frame F = this->idToFrame(Id);
+    if (!this->KeepSymbolName)
+      return F;
+    auto Iter = this->GuidToSymbolName.find(F.Function);
+    assert(Iter != this->GuidToSymbolName.end());
+    F.SymbolName = Iter->getSecond();
+    return F;
   };
+
   const IndexedMemProfRecord &IndexedRecord = Iter->second;
   GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
   Iter++;

diff  --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test
index e72728af101dd..5ec22e29ada3c 100644
--- a/llvm/test/tools/llvm-profdata/memprof-basic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-basic.test
@@ -52,6 +52,7 @@ CHECK-NEXT:     -
 CHECK-NEXT:       Callstack:
 CHECK-NEXT:       -
 CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
 CHECK-NEXT:         LineOffset: 1
 CHECK-NEXT:         Column: 21
 CHECK-NEXT:         Inline: 0
@@ -79,6 +80,7 @@ CHECK-NEXT:     -
 CHECK-NEXT:       Callstack:
 CHECK-NEXT:       -
 CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
 CHECK-NEXT:         LineOffset: 5
 CHECK-NEXT:         Column: 15
 CHECK-NEXT:         Inline: 0

diff  --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test
index a31903e120c72..5f34d5ac0d817 100644
--- a/llvm/test/tools/llvm-profdata/memprof-inline.test
+++ b/llvm/test/tools/llvm-profdata/memprof-inline.test
@@ -35,21 +35,6 @@ bin/clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \
 env MEMPROF_OPTIONS=log_path=stdout ./memprof-inline.exe > inline.memprofraw
 ```
 
-From a debug run we collect the name to guid mappings to ensure that the
-ordering we observe is as expected.
-
-```
-build-dbg/bin/llvm-profdata show --memory --debug-only=memprof \
-          inline.memprofraw --profiled-binary memprof-inline.exe
-
-FunctionName: qux GUID: 15505678318020221912
-FunctionName: foo GUID: 6699318081062747564
-FunctionName: bar GUID: 16434608426314478903
-FunctionName: main GUID: 15822663052811949562
-
-[..omit output here which is checked below..]
-```
-
 RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s
 
 CHECK:  MemprofProfile:
@@ -68,21 +53,25 @@ CHECK-NEXT:    -
 CHECK-NEXT:      Callstack:
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 15505678318020221912
+CHECK-NEXT:        SymbolName: qux
 CHECK-NEXT:        LineOffset: 1
 CHECK-NEXT:        Column: 15
 CHECK-NEXT:        Inline: 1
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 6699318081062747564
+CHECK-NEXT:        SymbolName: foo
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 18
 CHECK-NEXT:        Inline: 0
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 16434608426314478903
+CHECK-NEXT:        SymbolName: bar
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 19
 CHECK-NEXT:        Inline: 0
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 15822663052811949562
+CHECK-NEXT:        SymbolName: main
 CHECK-NEXT:        LineOffset: 1
 CHECK-NEXT:        Column: 3
 CHECK-NEXT:        Inline: 0
@@ -113,21 +102,25 @@ CHECK-NEXT:    -
 CHECK-NEXT:      Callstack:
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 15505678318020221912
+CHECK-NEXT:        SymbolName: qux
 CHECK-NEXT:        LineOffset: 1
 CHECK-NEXT:        Column: 15
 CHECK-NEXT:        Inline: 1
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 6699318081062747564
+CHECK-NEXT:        SymbolName: foo
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 18
 CHECK-NEXT:        Inline: 0
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 16434608426314478903
+CHECK-NEXT:        SymbolName: bar
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 19
 CHECK-NEXT:        Inline: 0
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 15822663052811949562
+CHECK-NEXT:        SymbolName: main
 CHECK-NEXT:        LineOffset: 1
 CHECK-NEXT:        Column: 3
 CHECK-NEXT:        Inline: 0
@@ -155,12 +148,14 @@ CHECK-NEXT:    CallSites:
 CHECK-NEXT:    -
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 15505678318020221912
+CHECK-NEXT:        SymbolName: qux
 CHECK-NEXT:        LineOffset: 1
 CHECK-NEXT:        Column: 15
 CHECK-NEXT:        Inline: 1
 CHECK-NEXT:    -
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 6699318081062747564
+CHECK-NEXT:        SymbolName: foo
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 18
 CHECK-NEXT:        Inline: 0
@@ -170,6 +165,7 @@ CHECK-NEXT:    CallSites:
 CHECK-NEXT:    -
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 15822663052811949562
+CHECK-NEXT:        SymbolName: main
 CHECK-NEXT:        LineOffset: 1
 CHECK-NEXT:        Column: 3
 CHECK-NEXT:        Inline: 0
@@ -179,6 +175,7 @@ CHECK-NEXT:    CallSites:
 CHECK-NEXT:    -
 CHECK-NEXT:      -
 CHECK-NEXT:        Function: 16434608426314478903
+CHECK-NEXT:        SymbolName: bar
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 19
 CHECK-NEXT:        Inline: 0

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index ffb64d71771cc..51e3f5ee797d9 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -2532,8 +2532,8 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
 static int showMemProfProfile(const std::string &Filename,
                               const std::string &ProfiledBinary,
                               raw_fd_ostream &OS) {
-  auto ReaderOr =
-      llvm::memprof::RawMemProfReader::create(Filename, ProfiledBinary);
+  auto ReaderOr = llvm::memprof::RawMemProfReader::create(
+      Filename, ProfiledBinary, /*KeepNames=*/true);
   if (Error E = ReaderOr.takeError())
     // Since the error can be related to the profile or the binary we do not
     // pass whence. Instead additional context is provided where necessary in

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 48d6ee77072ed..480848100446b 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -104,6 +104,11 @@ MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") {
     *result_listener << "Hash mismatch";
     return false;
   }
+  if (F.SymbolName.hasValue() && F.SymbolName.getValue() != FunctionName) {
+    *result_listener << "SymbolName mismatch\nWant: " << FunctionName
+                     << "\nGot: " << F.SymbolName.getValue();
+    return false;
+  }
   if (F.LineOffset == LineOffset && F.Column == Column &&
       F.IsInlineFrame == Inline) {
     return true;
@@ -154,7 +159,8 @@ TEST(MemProf, FillsValue) {
 
   auto Seg = makeSegments();
 
-  RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
+  RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM,
+                          /*KeepName=*/true);
 
   llvm::DenseMap<llvm::GlobalValue::GUID, MemProfRecord> Records;
   for (const auto &Pair : Reader) {


        


More information about the llvm-commits mailing list