[llvm] ec51971 - [memprof] Keep and display symbol names in the RawMemProfReader.
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Wed May 25 14:17:49 PDT 2022
Author: Snehasish Kumar
Date: 2022-05-25T21:17:44Z
New Revision: ec51971eae02a031bbfd992cf59e1f6acdadbeeb
URL: https://github.com/llvm/llvm-project/commit/ec51971eae02a031bbfd992cf59e1f6acdadbeeb
DIFF: https://github.com/llvm/llvm-project/commit/ec51971eae02a031bbfd992cf59e1f6acdadbeeb.diff
LOG: [memprof] Keep and display symbol names in the RawMemProfReader.
Extend the Frame struct to hold the symbol name if requested
when a RawMemProfReader object is constructed. This change updates the
tests and removes the need to pass --debug to obtain the mapping from
GUID to symbol names.
Reviewed By: tejohnson
Differential Revision: https://reviews.llvm.org/D126344
Added:
Modified:
llvm/include/llvm/ProfileData/MemProf.h
llvm/include/llvm/ProfileData/RawMemProfReader.h
llvm/lib/ProfileData/RawMemProfReader.cpp
llvm/test/tools/llvm-profdata/memprof-basic.test
llvm/test/tools/llvm-profdata/memprof-inline.test
llvm/tools/llvm-profdata/llvm-profdata.cpp
llvm/unittests/ProfileData/MemProfTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 3a80ab26cd46e..0406d4fab711e 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -142,6 +142,9 @@ struct Frame {
// A uuid (uint64_t) identifying the function. It is obtained by
// llvm::md5(FunctionName) which returns the lower 64 bits.
GlobalValue::GUID Function;
+ // The symbol name for the function. Only populated in the Frame by the reader
+ // if requested during initialization. This field should not be serialized.
+ llvm::Optional<std::string> SymbolName;
// The source line offset of the call from the beginning of parent function.
uint32_t LineOffset;
// The source column number of the call to help distinguish multiple calls
@@ -152,6 +155,7 @@ struct Frame {
Frame(const Frame &Other) {
Function = Other.Function;
+ SymbolName = Other.SymbolName;
LineOffset = Other.LineOffset;
Column = Other.Column;
IsInlineFrame = Other.IsInlineFrame;
@@ -161,12 +165,15 @@ struct Frame {
: Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
bool operator==(const Frame &Other) const {
+ // Ignore the SymbolName field to avoid a string compare. Comparing the
+ // function hash serves the same purpose.
return Other.Function == Function && Other.LineOffset == LineOffset &&
Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
}
Frame &operator=(const Frame &Other) {
Function = Other.Function;
+ SymbolName = Other.SymbolName;
LineOffset = Other.LineOffset;
Column = Other.Column;
IsInlineFrame = Other.IsInlineFrame;
@@ -214,6 +221,8 @@ struct Frame {
void printYAML(raw_ostream &OS) const {
OS << " -\n"
<< " Function: " << Function << "\n"
+ << " SymbolName: "
+ << (SymbolName.hasValue() ? SymbolName.getValue() : "<None>") << "\n"
<< " LineOffset: " << LineOffset << "\n"
<< " Column: " << Column << "\n"
<< " Inline: " << IsInlineFrame << "\n";
diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 4421b77d78a16..a81db8ee3d995 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -57,7 +57,8 @@ class RawMemProfReader {
// \p Path. The binary from which the profile has been collected is specified
// via a path in \p ProfiledBinary.
static Expected<std::unique_ptr<RawMemProfReader>>
- create(const Twine &Path, const StringRef ProfiledBinary);
+ create(const Twine &Path, const StringRef ProfiledBinary,
+ bool KeepName = false);
using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>;
@@ -76,9 +77,9 @@ class RawMemProfReader {
RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
llvm::SmallVectorImpl<SegmentEntry> &Seg,
llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
- CallStackMap &SM)
+ CallStackMap &SM, bool KeepName = false)
: Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
- CallstackProfileData(Prof), StackMap(SM) {
+ CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) {
// We don't call initialize here since there is no raw profile to read. The
// test should pass in the raw profile as structured data.
@@ -103,8 +104,9 @@ class RawMemProfReader {
private:
RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
- object::OwningBinary<object::Binary> &&Bin)
- : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {}
+ object::OwningBinary<object::Binary> &&Bin, bool KeepName)
+ : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)),
+ KeepSymbolName(KeepName) {}
Error initialize();
Error readRawProfile();
// Symbolize and cache all the virtual addresses we encounter in the
@@ -146,8 +148,12 @@ class RawMemProfReader {
llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
-};
+ // Whether to keep the symbol name for each frame after hashing.
+ bool KeepSymbolName = false;
+ // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
+ llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
+};
} // namespace memprof
} // namespace llvm
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 2a619c42fa6ae..96ed9bbe5a768 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -12,6 +12,7 @@
#include <algorithm>
#include <cstdint>
+#include <memory>
#include <type_traits>
#include "llvm/ADT/ArrayRef.h"
@@ -174,7 +175,8 @@ bool isRuntimePath(const StringRef Path) {
} // namespace
Expected<std::unique_ptr<RawMemProfReader>>
-RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
+RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
+ bool KeepName) {
auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
if (std::error_code EC = BufferOr.getError())
return report(errorCodeToError(EC), Path.getSingleStringRef());
@@ -193,8 +195,9 @@ RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
return report(BinaryOr.takeError(), ProfiledBinary);
}
- std::unique_ptr<RawMemProfReader> Reader(
- new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get())));
+ // Use new here since constructor is private.
+ std::unique_ptr<RawMemProfReader> Reader(new RawMemProfReader(
+ std::move(Buffer), std::move(BinaryOr.get()), KeepName));
if (Error E = Reader->initialize()) {
return std::move(E);
}
@@ -407,21 +410,21 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
I++) {
const auto &DIFrame = DI.getFrame(I);
- LLVM_DEBUG(
- // Print out the name to guid mapping for debugging.
- llvm::dbgs() << "FunctionName: " << DIFrame.FunctionName
- << " GUID: "
- << IndexedMemProfRecord::getGUID(DIFrame.FunctionName)
- << "\n";);
-
- const Frame F(IndexedMemProfRecord::getGUID(DIFrame.FunctionName),
- DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
+ const uint64_t Guid =
+ IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
+ const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
// Only the last entry is not an inlined location.
I != NumFrames - 1);
-
- const FrameId Id = F.hash();
- IdToFrame.insert({Id, F});
- SymbolizedFrame[VAddr].push_back(Id);
+ // Here we retain a mapping from the GUID to symbol name instead of
+ // adding it to the frame object directly to reduce memory overhead.
+ // This is because there can be many unique frames, particularly for
+ // callsite frames.
+ if (KeepSymbolName)
+ GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+
+ const FrameId Hash = F.hash();
+ IdToFrame.insert({Hash, F});
+ SymbolizedFrame[VAddr].push_back(Hash);
}
}
@@ -525,8 +528,15 @@ Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
return make_error<InstrProfError>(instrprof_error::eof);
auto IdToFrameCallback = [this](const FrameId Id) {
- return this->idToFrame(Id);
+ Frame F = this->idToFrame(Id);
+ if (!this->KeepSymbolName)
+ return F;
+ auto Iter = this->GuidToSymbolName.find(F.Function);
+ assert(Iter != this->GuidToSymbolName.end());
+ F.SymbolName = Iter->getSecond();
+ return F;
};
+
const IndexedMemProfRecord &IndexedRecord = Iter->second;
GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
Iter++;
diff --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test
index e72728af101dd..5ec22e29ada3c 100644
--- a/llvm/test/tools/llvm-profdata/memprof-basic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-basic.test
@@ -52,6 +52,7 @@ CHECK-NEXT: -
CHECK-NEXT: Callstack:
CHECK-NEXT: -
CHECK-NEXT: Function: {{[0-9]+}}
+CHECK-NEXT: SymbolName: main
CHECK-NEXT: LineOffset: 1
CHECK-NEXT: Column: 21
CHECK-NEXT: Inline: 0
@@ -79,6 +80,7 @@ CHECK-NEXT: -
CHECK-NEXT: Callstack:
CHECK-NEXT: -
CHECK-NEXT: Function: {{[0-9]+}}
+CHECK-NEXT: SymbolName: main
CHECK-NEXT: LineOffset: 5
CHECK-NEXT: Column: 15
CHECK-NEXT: Inline: 0
diff --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test
index a31903e120c72..5f34d5ac0d817 100644
--- a/llvm/test/tools/llvm-profdata/memprof-inline.test
+++ b/llvm/test/tools/llvm-profdata/memprof-inline.test
@@ -35,21 +35,6 @@ bin/clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \
env MEMPROF_OPTIONS=log_path=stdout ./memprof-inline.exe > inline.memprofraw
```
-From a debug run we collect the name to guid mappings to ensure that the
-ordering we observe is as expected.
-
-```
-build-dbg/bin/llvm-profdata show --memory --debug-only=memprof \
- inline.memprofraw --profiled-binary memprof-inline.exe
-
-FunctionName: qux GUID: 15505678318020221912
-FunctionName: foo GUID: 6699318081062747564
-FunctionName: bar GUID: 16434608426314478903
-FunctionName: main GUID: 15822663052811949562
-
-[..omit output here which is checked below..]
-```
-
RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s
CHECK: MemprofProfile:
@@ -68,21 +53,25 @@ CHECK-NEXT: -
CHECK-NEXT: Callstack:
CHECK-NEXT: -
CHECK-NEXT: Function: 15505678318020221912
+CHECK-NEXT: SymbolName: qux
CHECK-NEXT: LineOffset: 1
CHECK-NEXT: Column: 15
CHECK-NEXT: Inline: 1
CHECK-NEXT: -
CHECK-NEXT: Function: 6699318081062747564
+CHECK-NEXT: SymbolName: foo
CHECK-NEXT: LineOffset: 0
CHECK-NEXT: Column: 18
CHECK-NEXT: Inline: 0
CHECK-NEXT: -
CHECK-NEXT: Function: 16434608426314478903
+CHECK-NEXT: SymbolName: bar
CHECK-NEXT: LineOffset: 0
CHECK-NEXT: Column: 19
CHECK-NEXT: Inline: 0
CHECK-NEXT: -
CHECK-NEXT: Function: 15822663052811949562
+CHECK-NEXT: SymbolName: main
CHECK-NEXT: LineOffset: 1
CHECK-NEXT: Column: 3
CHECK-NEXT: Inline: 0
@@ -113,21 +102,25 @@ CHECK-NEXT: -
CHECK-NEXT: Callstack:
CHECK-NEXT: -
CHECK-NEXT: Function: 15505678318020221912
+CHECK-NEXT: SymbolName: qux
CHECK-NEXT: LineOffset: 1
CHECK-NEXT: Column: 15
CHECK-NEXT: Inline: 1
CHECK-NEXT: -
CHECK-NEXT: Function: 6699318081062747564
+CHECK-NEXT: SymbolName: foo
CHECK-NEXT: LineOffset: 0
CHECK-NEXT: Column: 18
CHECK-NEXT: Inline: 0
CHECK-NEXT: -
CHECK-NEXT: Function: 16434608426314478903
+CHECK-NEXT: SymbolName: bar
CHECK-NEXT: LineOffset: 0
CHECK-NEXT: Column: 19
CHECK-NEXT: Inline: 0
CHECK-NEXT: -
CHECK-NEXT: Function: 15822663052811949562
+CHECK-NEXT: SymbolName: main
CHECK-NEXT: LineOffset: 1
CHECK-NEXT: Column: 3
CHECK-NEXT: Inline: 0
@@ -155,12 +148,14 @@ CHECK-NEXT: CallSites:
CHECK-NEXT: -
CHECK-NEXT: -
CHECK-NEXT: Function: 15505678318020221912
+CHECK-NEXT: SymbolName: qux
CHECK-NEXT: LineOffset: 1
CHECK-NEXT: Column: 15
CHECK-NEXT: Inline: 1
CHECK-NEXT: -
CHECK-NEXT: -
CHECK-NEXT: Function: 6699318081062747564
+CHECK-NEXT: SymbolName: foo
CHECK-NEXT: LineOffset: 0
CHECK-NEXT: Column: 18
CHECK-NEXT: Inline: 0
@@ -170,6 +165,7 @@ CHECK-NEXT: CallSites:
CHECK-NEXT: -
CHECK-NEXT: -
CHECK-NEXT: Function: 15822663052811949562
+CHECK-NEXT: SymbolName: main
CHECK-NEXT: LineOffset: 1
CHECK-NEXT: Column: 3
CHECK-NEXT: Inline: 0
@@ -179,6 +175,7 @@ CHECK-NEXT: CallSites:
CHECK-NEXT: -
CHECK-NEXT: -
CHECK-NEXT: Function: 16434608426314478903
+CHECK-NEXT: SymbolName: bar
CHECK-NEXT: LineOffset: 0
CHECK-NEXT: Column: 19
CHECK-NEXT: Inline: 0
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index ffb64d71771cc..51e3f5ee797d9 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -2532,8 +2532,8 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
static int showMemProfProfile(const std::string &Filename,
const std::string &ProfiledBinary,
raw_fd_ostream &OS) {
- auto ReaderOr =
- llvm::memprof::RawMemProfReader::create(Filename, ProfiledBinary);
+ auto ReaderOr = llvm::memprof::RawMemProfReader::create(
+ Filename, ProfiledBinary, /*KeepNames=*/true);
if (Error E = ReaderOr.takeError())
// Since the error can be related to the profile or the binary we do not
// pass whence. Instead additional context is provided where necessary in
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 48d6ee77072ed..480848100446b 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -104,6 +104,11 @@ MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") {
*result_listener << "Hash mismatch";
return false;
}
+ if (F.SymbolName.hasValue() && F.SymbolName.getValue() != FunctionName) {
+ *result_listener << "SymbolName mismatch\nWant: " << FunctionName
+ << "\nGot: " << F.SymbolName.getValue();
+ return false;
+ }
if (F.LineOffset == LineOffset && F.Column == Column &&
F.IsInlineFrame == Inline) {
return true;
@@ -154,7 +159,8 @@ TEST(MemProf, FillsValue) {
auto Seg = makeSegments();
- RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
+ RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM,
+ /*KeepName=*/true);
llvm::DenseMap<llvm::GlobalValue::GUID, MemProfRecord> Records;
for (const auto &Pair : Reader) {
More information about the llvm-commits
mailing list