[llvm] [Demo only] Illustrate the llvm-profgen changes to process PEBS memory load events into <IP, DataAddr, Count> Tuples. (PR #142007)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 11:21:22 PDT 2025
https://github.com/mingmingl-llvm created https://github.com/llvm/llvm-project/pull/142007
None
>From dab9c01c6e8181c49ff654d0b7eb61e3768afdf0 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sun, 18 May 2025 18:24:08 -0700
Subject: [PATCH 1/2] Support data access profile in llvm-profgen
---
llvm/tools/llvm-profgen/CMakeLists.txt | 1 +
.../llvm-profgen/DataAccessPerfReader.cpp | 129 ++++++++++++++++++
.../tools/llvm-profgen/DataAccessPerfReader.h | 57 ++++++++
llvm/tools/llvm-profgen/PerfReader.cpp | 2 +
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 ++
llvm/tools/llvm-profgen/llvm-profgen.cpp | 84 ++++++++----
6 files changed, 255 insertions(+), 28 deletions(-)
create mode 100644 llvm/tools/llvm-profgen/DataAccessPerfReader.cpp
create mode 100644 llvm/tools/llvm-profgen/DataAccessPerfReader.h
diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt
index 354c63f409ffe..d11579145a517 100644
--- a/llvm/tools/llvm-profgen/CMakeLists.txt
+++ b/llvm/tools/llvm-profgen/CMakeLists.txt
@@ -18,6 +18,7 @@ set(LLVM_LINK_COMPONENTS
add_llvm_tool(llvm-profgen
llvm-profgen.cpp
PerfReader.cpp
+ DataAccessPerfReader.cpp
CSPreInliner.cpp
ProfiledBinary.cpp
ProfileGenerator.cpp
diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp
new file mode 100644
index 0000000000000..9c0858ef49521
--- /dev/null
+++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp
@@ -0,0 +1,129 @@
+#include "DataAccessPerfReader.h"
+#include "ErrorHandling.h"
+#include "PerfReader.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#include <regex>
+
+static llvm::Regex IPSampleRegex(": 0x[a-fA-F0-9]+ period:");
+static llvm::Regex DataAddressRegex("addr: 0x[a-fA-F0-9]+");
+
+namespace llvm {
+
+void DataAccessPerfReader::parsePerfTraces() {
+ parsePerfTrace(PerfTraceFilename);
+}
+
+static void testPerfSampleRecordRegex() {
+ std::regex logRegex(
+ R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)");
+
+ std::smatch testMatch;
+ const std::string testLine =
+ "2193330181938979 0xa88 [0x48]: PERF_RECORD_SAMPLE(IP, 0x4002): "
+ "1807344/1807344: 0x260b45 period: 100 addr: 0x200630";
+ if (std::regex_search(testLine, testMatch, logRegex)) {
+ if (testMatch.size() != 5) {
+ exitWithError("Regex did not match expected number of groups.");
+ }
+ for (size_t i = 0; i < testMatch.size(); ++i) {
+ errs() << "Group " << i << ": " << testMatch[i] << "\n";
+ }
+ // errs() << "Test line matched successfully.\n";
+ } else {
+ exitWithError("Test line did not match regex.");
+ }
+}
+
+// Ignore mmap events.
+void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) {
+ std::regex logRegex(
+ R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)");
+ uint64_t UnmatchedLine = 0, MatchedLine = 0;
+
+ auto BufferOrErr = MemoryBuffer::getFile(PerfTrace);
+ std::error_code EC = BufferOrErr.getError();
+ if (EC)
+ exitWithError("Failed to open perf trace file: " + PerfTrace);
+
+ line_iterator LineIt(*BufferOrErr.get(), true);
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef Line = *LineIt;
+
+ // Parse MMAP event from perf trace.
+ // Construct a binary from the binary file path.
+ PerfScriptReader::MMapEvent MMap;
+ if (Line.contains("PERF_RECORD_MMAP2")) {
+ if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) {
+ errs() << "MMap event found: "
+ << "PID: " << MMap.PID
+ << ", Address: " << format("0x%llx", MMap.Address)
+ << ", Size: " << MMap.Size << ", Offset: " << MMap.Offset
+ << ", Binary Path: " << MMap.BinaryPath << "\n";
+ if (MMap.Offset == 0) {
+ updateBinaryAddress(MMap);
+ }
+ }
+ continue;
+ }
+
+ if (!Line.contains("PERF_RECORD_SAMPLE")) {
+ // Skip lines that do not contain "PERF_RECORD_SAMPLE".
+ continue;
+ }
+ // errs() << "Processing line: " << Line << "\n";
+
+ // if (IPSampleRegex.match(Line, &Matches)) {
+ // errs() << "IP Captured: " << Matches.size() << "\n";
+ // }
+ // if (DataAddressRegex.match(Line, &Matches)) {
+ // errs() << "Data Address Captured: " << Matches.size() << "\n";
+ // }
+
+ std::smatch matches;
+ const std::string LineStr = Line.str();
+
+ if (std::regex_search(LineStr.begin(), LineStr.end(), matches, logRegex)) {
+ if (matches.size() != 5)
+ continue;
+
+ uint64_t DataAddress = std::stoull(matches[4].str(), nullptr, 16);
+ uint64_t IP = std::stoull(matches[3].str(), nullptr, 16);
+ int32_t PID = std::stoi(matches[1].str());
+ // if (DataAddress == 0x200630) {
+ // errs() << "Find data address at 0x200630, IP: " << format("0x%llx",
+ // IP)
+ // << " pid is " << PID << "\n";
+ // }
+
+ // errs() << matches.size() << " matches found in line: " << LineStr <<
+ // "\n"; for (const auto &Match : matches) {
+ // errs() << "Match: " << Match.str() << "\n";
+ // }
+ // Check if the PID matches the filter.
+
+ if (PIDFilter && *PIDFilter != PID) {
+ continue;
+ }
+
+ // Extract the address and count.
+
+ uint64_t CanonicalDataAddress =
+ Binary->canonicalizeVirtualAddress(DataAddress);
+ // errs() << "Data address is " << format("0x" PRIx64 ":", DataAddress)
+ // << " Canonical data address is "
+ // << format("0x" PRIx64 ":", CanonicalDataAddress) << "\n";
+ AddressToCount[CanonicalDataAddress] += 1;
+ MatchedLine++;
+ } else {
+ // errs() << "\tNo match found for line: " << Line << "\n";
+ UnmatchedLine++;
+ }
+ }
+
+ errs() << "Total unmatched lines: " << UnmatchedLine << "\t"
+ << "Matched lines: " << MatchedLine << "\n";
+}
+
+} // namespace llvm
diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.h b/llvm/tools/llvm-profgen/DataAccessPerfReader.h
new file mode 100644
index 0000000000000..5e2dc4da5bc6f
--- /dev/null
+++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.h
@@ -0,0 +1,57 @@
+//===-- DataAccessPerfReader.h - perfscript reader for data access profiles -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROFGEN_DATAACCESSPERFREADER_H
+#define LLVM_TOOLS_LLVM_PROFGEN_DATAACCESSPERFREADER_H
+
+#include "PerfReader.h"
+#include "ProfiledBinary.h"
+#include "llvm/ADT/MapVector.h"
+
+namespace llvm {
+
+class DataAccessPerfReader : public PerfScriptReader {
+public:
+ DataAccessPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
+ std::optional<int32_t> PID)
+ : PerfScriptReader(Binary, PerfTrace, PID), PerfTraceFilename(PerfTrace) {
+ }
+
+ // Entry of the reader to parse multiple perf traces
+ void parsePerfTraces() override;
+
+ auto getAddressToCount() const {
+ return AddressToCount.getArrayRef();
+ }
+
+ void print() const {
+ auto addrCountArray = AddressToCount.getArrayRef();
+ std::vector<std::pair<uint64_t, uint64_t>> SortedEntries(
+ addrCountArray.begin(), addrCountArray.end());
+ llvm::sort(SortedEntries, [](const auto &A, const auto &B) {
+ return A.second > B.second;
+ });
+ for (const auto &Entry : SortedEntries) {
+ if (Entry.second == 0)
+ continue; // Skip entries with zero count
+ dbgs() << "Address: " << format("0x%llx", Entry.first)
+ << ", Count: " << Entry.second << "\n";
+ }
+ }
+
+private:
+ void parsePerfTrace(StringRef PerfTrace);
+
+ MapVector<uint64_t, uint64_t> AddressToCount;
+
+ StringRef PerfTraceFilename;
+};
+
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_PROFGEN_DATAACCESSPERFREADER_H
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index ad113eda27914..ecacd42576dee 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -478,6 +478,8 @@ void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
// Only update for the first executable segment and assume all other
// segments are loaded at consecutive memory addresses, which is the case on
// X64.
+ errs() << "Setting " << Binary->getPath() << " base address to "
+ << format("0x%" PRIx64, Event.Address) << "\n";
Binary->setBaseAddress(Event.Address);
Binary->setIsLoadedByMMap(true);
} else {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 6847ba1b21b1f..3507389922500 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -60,6 +60,10 @@ static cl::opt<bool>
KernelBinary("kernel",
cl::desc("Generate the profile for Linux kernel binary."));
+static cl::opt<bool> RecordDataSegment("record-data-segment", cl::init(false),
+ cl::desc("Record data segment size "
+ "in the profile."));
+
extern cl::opt<bool> ShowDetailedWarning;
extern cl::opt<bool> InferMissingFrames;
@@ -337,6 +341,12 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
~(PageSize - 1U));
TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
}
+ // else if ((Phdr.p_flags & ELF::PF_R) && !TextSegmentOffsets.empty()) {
+ // if (RecordDataSegment) {
+ // ReadOnlyDataSegmentOffsets.push_back(Phdr.p_offset &
+ // ~(PageSize - 1U));
+ // }
+ // }
}
}
diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp
index 3b974e25103ad..acdadf7ba0c4b 100644
--- a/llvm/tools/llvm-profgen/llvm-profgen.cpp
+++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "DataAccessPerfReader.h"
#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfileGenerator.h"
@@ -21,6 +22,13 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/VirtualFileSystem.h"
+namespace {
+enum ProfileKinds {
+ SamplePGO,
+ DataAccessProfile,
+};
+} // namespace
+
static cl::OptionCategory ProfGenCategory("ProfGen Options");
static cl::opt<std::string> PerfScriptFilename(
@@ -67,6 +75,11 @@ static cl::opt<std::string> DebugBinPath(
"from it instead of the executable binary."),
cl::cat(ProfGenCategory));
+static cl::opt<ProfileKinds> ProfileKind(
+ "profile-kind", cl::value_desc("profile-kind"),
+ cl::desc("Profile kind to be generated, default is sample profile."),
+ cl::init(DataAccessProfile), cl::cat(ProfGenCategory));
+
extern cl::opt<bool> ShowDisassemblyOnly;
extern cl::opt<bool> ShowSourceLocations;
extern cl::opt<bool> SkipSymbolization;
@@ -156,37 +169,52 @@ int main(int argc, const char *argv[]) {
if (ShowDisassemblyOnly)
return EXIT_SUCCESS;
- if (SampleProfFilename.getNumOccurrences()) {
- LLVMContext Context;
- auto FS = vfs::getRealFileSystem();
- auto ReaderOrErr =
- SampleProfileReader::create(SampleProfFilename, Context, *FS);
- std::unique_ptr<sampleprof::SampleProfileReader> Reader =
- std::move(ReaderOrErr.get());
- Reader->read();
- std::unique_ptr<ProfileGeneratorBase> Generator =
- ProfileGeneratorBase::create(Binary.get(), Reader->getProfiles(),
- Reader->profileIsCS());
- Generator->generateProfile();
- Generator->write();
+ if (ProfileKind == SamplePGO) {
+ if (SampleProfFilename.getNumOccurrences()) {
+ LLVMContext Context;
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr =
+ SampleProfileReader::create(SampleProfFilename, Context, *FS);
+ std::unique_ptr<sampleprof::SampleProfileReader> Reader =
+ std::move(ReaderOrErr.get());
+ Reader->read();
+ std::unique_ptr<ProfileGeneratorBase> Generator =
+ ProfileGeneratorBase::create(Binary.get(), Reader->getProfiles(),
+ Reader->profileIsCS());
+ Generator->generateProfile();
+ Generator->write();
+ } else {
+ std::optional<uint32_t> PIDFilter;
+ if (ProcessId.getNumOccurrences())
+ PIDFilter = ProcessId;
+ PerfInputFile PerfFile = getPerfInputFile();
+ std::unique_ptr<PerfReaderBase> Reader =
+ PerfReaderBase::create(Binary.get(), PerfFile, PIDFilter);
+ // Parse perf events and samples
+ Reader->parsePerfTraces();
+
+ if (SkipSymbolization)
+ return EXIT_SUCCESS;
+
+ std::unique_ptr<ProfileGeneratorBase> Generator =
+ ProfileGeneratorBase::create(Binary.get(),
+ &Reader->getSampleCounters(),
+ Reader->profileIsCS());
+ Generator->generateProfile();
+ Generator->write();
+ }
} else {
- std::optional<uint32_t> PIDFilter;
- if (ProcessId.getNumOccurrences())
- PIDFilter = ProcessId;
- PerfInputFile PerfFile = getPerfInputFile();
- std::unique_ptr<PerfReaderBase> Reader =
- PerfReaderBase::create(Binary.get(), PerfFile, PIDFilter);
- // Parse perf events and samples
+ assert(Binary.get() &&
+ "Binary should be initialized for data access profile");
+ errs() << "binary text segment offset is "
+ << format("0x%" PRIx64 ":", Binary->getTextSegmentOffset()) << "\n";
+ // data access profile.
+ SmallVector<StringRef, 4> PerfTraces{PerfScriptFilename};
+ auto Reader = std::make_unique<DataAccessPerfReader>(
+ Binary.get(), PerfScriptFilename, std::nullopt);
Reader->parsePerfTraces();
- if (SkipSymbolization)
- return EXIT_SUCCESS;
-
- std::unique_ptr<ProfileGeneratorBase> Generator =
- ProfileGeneratorBase::create(Binary.get(), &Reader->getSampleCounters(),
- Reader->profileIsCS());
- Generator->generateProfile();
- Generator->write();
+ Reader->print();
}
return EXIT_SUCCESS;
>From 52465e6c52f1afcff6509d2a67967701135e05fa Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 28 May 2025 21:14:32 -0700
Subject: [PATCH 2/2] remove comment
---
.../llvm-profgen/DataAccessPerfReader.cpp | 81 +++++--------------
.../tools/llvm-profgen/DataAccessPerfReader.h | 79 ++++++++++++++----
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 6 --
llvm/tools/llvm-profgen/ProfiledBinary.h | 2 +
llvm/tools/llvm-profgen/llvm-profgen.cpp | 3 +-
5 files changed, 90 insertions(+), 81 deletions(-)
diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp
index 9c0858ef49521..6a02b2d242d2d 100644
--- a/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp
+++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp
@@ -15,32 +15,10 @@ void DataAccessPerfReader::parsePerfTraces() {
parsePerfTrace(PerfTraceFilename);
}
-static void testPerfSampleRecordRegex() {
- std::regex logRegex(
- R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)");
-
- std::smatch testMatch;
- const std::string testLine =
- "2193330181938979 0xa88 [0x48]: PERF_RECORD_SAMPLE(IP, 0x4002): "
- "1807344/1807344: 0x260b45 period: 100 addr: 0x200630";
- if (std::regex_search(testLine, testMatch, logRegex)) {
- if (testMatch.size() != 5) {
- exitWithError("Regex did not match expected number of groups.");
- }
- for (size_t i = 0; i < testMatch.size(); ++i) {
- errs() << "Group " << i << ": " << testMatch[i] << "\n";
- }
- // errs() << "Test line matched successfully.\n";
- } else {
- exitWithError("Test line did not match regex.");
- }
-}
-
// Ignore mmap events.
void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) {
std::regex logRegex(
R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)");
- uint64_t UnmatchedLine = 0, MatchedLine = 0;
auto BufferOrErr = MemoryBuffer::getFile(PerfTrace);
std::error_code EC = BufferOrErr.getError();
@@ -51,18 +29,23 @@ void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) {
for (; !LineIt.is_at_eof(); ++LineIt) {
StringRef Line = *LineIt;
+ // Parse MMAP event from perf trace.
// Parse MMAP event from perf trace.
// Construct a binary from the binary file path.
PerfScriptReader::MMapEvent MMap;
if (Line.contains("PERF_RECORD_MMAP2")) {
if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) {
- errs() << "MMap event found: "
- << "PID: " << MMap.PID
- << ", Address: " << format("0x%llx", MMap.Address)
- << ", Size: " << MMap.Size << ", Offset: " << MMap.Offset
- << ", Binary Path: " << MMap.BinaryPath << "\n";
+ // TODO: This is a hack to avoid mapping binary address for data section
+ // mappings.
if (MMap.Offset == 0) {
updateBinaryAddress(MMap);
+ errs() << "Binary base address is "
+ << format("0x%" PRIx64, Binary->getBaseAddress())
+ << " and preferred base address is "
+ << format("0x%" PRIx64, Binary->getPreferredBaseAddress())
+ << " and first loadable address is "
+ << format("0x%" PRIx64, Binary->getFirstLoadableAddress())
+ << "\n";
}
}
continue;
@@ -72,14 +55,6 @@ void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) {
// Skip lines that do not contain "PERF_RECORD_SAMPLE".
continue;
}
- // errs() << "Processing line: " << Line << "\n";
-
- // if (IPSampleRegex.match(Line, &Matches)) {
- // errs() << "IP Captured: " << Matches.size() << "\n";
- // }
- // if (DataAddressRegex.match(Line, &Matches)) {
- // errs() << "Data Address Captured: " << Matches.size() << "\n";
- // }
std::smatch matches;
const std::string LineStr = Line.str();
@@ -89,41 +64,29 @@ void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) {
continue;
uint64_t DataAddress = std::stoull(matches[4].str(), nullptr, 16);
- uint64_t IP = std::stoull(matches[3].str(), nullptr, 16);
+
+ // Skip addresses out of the specified PT_LOAD section for data.
+ if (DataAddress < DataMMap.Address ||
+ DataAddress >= DataMMap.Address + DataMMap.Size)
+ continue;
+
int32_t PID = std::stoi(matches[1].str());
- // if (DataAddress == 0x200630) {
- // errs() << "Find data address at 0x200630, IP: " << format("0x%llx",
- // IP)
- // << " pid is " << PID << "\n";
- // }
-
- // errs() << matches.size() << " matches found in line: " << LineStr <<
- // "\n"; for (const auto &Match : matches) {
- // errs() << "Match: " << Match.str() << "\n";
- // }
// Check if the PID matches the filter.
if (PIDFilter && *PIDFilter != PID) {
continue;
}
+ uint64_t IP = std::stoull(matches[3].str(), nullptr, 16);
// Extract the address and count.
-
uint64_t CanonicalDataAddress =
- Binary->canonicalizeVirtualAddress(DataAddress);
- // errs() << "Data address is " << format("0x" PRIx64 ":", DataAddress)
- // << " Canonical data address is "
- // << format("0x" PRIx64 ":", CanonicalDataAddress) << "\n";
- AddressToCount[CanonicalDataAddress] += 1;
- MatchedLine++;
- } else {
- // errs() << "\tNo match found for line: " << Line << "\n";
- UnmatchedLine++;
+ canonicalizeDataAddress(DataAddress, *Binary, DataMMap, DataSegment);
+
+ uint64_t CanonicalIPAddress = Binary->canonicalizeVirtualAddress(IP);
+
+ AddressMap[CanonicalIPAddress][CanonicalDataAddress] += 1;
}
}
-
- errs() << "Total unmatched lines: " << UnmatchedLine << "\t"
- << "Matched lines: " << MatchedLine << "\n";
}
} // namespace llvm
diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.h b/llvm/tools/llvm-profgen/DataAccessPerfReader.h
index 5e2dc4da5bc6f..31961159fbc30 100644
--- a/llvm/tools/llvm-profgen/DataAccessPerfReader.h
+++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.h
@@ -17,39 +17,90 @@ namespace llvm {
class DataAccessPerfReader : public PerfScriptReader {
public:
+ class DataSegment {
+ public:
+ uint64_t FileOffset;
+ uint64_t VirtualAddress;
+ };
DataAccessPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
std::optional<int32_t> PID)
: PerfScriptReader(Binary, PerfTrace, PID), PerfTraceFilename(PerfTrace) {
+ hackMMapEventAndDataSegment(DataMMap, DataSegment, *Binary);
+ }
+
+ // The MMapEvent is hard-coded as a hack to illustrate the change.
+ static void
+ hackMMapEventAndDataSegment(PerfScriptReader::MMapEvent &MMap,
+ DataSegment &DataSegment,
+ const ProfiledBinary &ProfiledBinary) {
+ // The PERF_RECORD_MMAP2 event is
+ // 0 0x4e8 [0xa0]: PERF_RECORD_MMAP2 1849842/1849842:
+ // [0x55d977426000(0x1000) @ 0x1000 fd:01 20869534 0]: r--p /path/to/binary
+ MMap.PID = 1849842; // Example PID
+ MMap.BinaryPath = ProfiledBinary.getPath();
+ MMap.Address = 0x55d977426000;
+ MMap.Size = 0x1000;
+ MMap.Offset = 0x1000; // File Offset in the binary.
+
+ // TODO: Set binary fields to do address canonicalization, and compute
+ // static data address range.
+ DataSegment.FileOffset =
+ 0x1180; // The byte offset of the segment start in the binary.
+ DataSegment.VirtualAddress =
+ 0x3180; // The virtual address of the segment start in the binary.
+ }
+
+ uint64_t canonicalizeDataAddress(uint64_t Address,
+ const ProfiledBinary &ProfiledBinary,
+ const PerfScriptReader::MMapEvent &MMap,
+ const DataSegment &DataSegment) {
+ // virtual-addr = segment.virtual-addr (0x3180) + (runtime-addr -
+ // map.adddress - segment.file-offset (0x1180) + map.file-offset (0x1000))
+ return DataSegment.VirtualAddress +
+ (Address - MMap.Address - (DataSegment.FileOffset - MMap.Offset));
}
// Entry of the reader to parse multiple perf traces
void parsePerfTraces() override;
- auto getAddressToCount() const {
- return AddressToCount.getArrayRef();
- }
+ struct ProfiledInfo {
+ ProfiledInfo(uint64_t InstructionAddr, uint64_t DataAddr, uint64_t Count)
+ : InstructionAddr(InstructionAddr), DataAddr(DataAddr), Count(Count) {}
+ uint64_t InstructionAddr;
+ uint64_t DataAddr;
+ uint64_t Count;
+ };
+ // A hack to demonstrate the symbolized output of vtable type profiling.
void print() const {
- auto addrCountArray = AddressToCount.getArrayRef();
- std::vector<std::pair<uint64_t, uint64_t>> SortedEntries(
- addrCountArray.begin(), addrCountArray.end());
- llvm::sort(SortedEntries, [](const auto &A, const auto &B) {
- return A.second > B.second;
- });
- for (const auto &Entry : SortedEntries) {
- if (Entry.second == 0)
+
+ std::vector<ProfiledInfo> Entries;
+ Entries.reserve(AddressMap.size());
+ for (const auto &[IpAddr, DataCount] : AddressMap) {
+ for (const auto [DataAddr, Count] : DataCount) {
+ Entries.emplace_back(ProfiledInfo(IpAddr, DataAddr, Count));
+ }
+ }
+ llvm::sort(Entries,
+ [](const auto &A, const auto &B) { return A.Count > B.Count; });
+ for (const auto &Entry : Entries) {
+ if (Entry.Count == 0)
continue; // Skip entries with zero count
- dbgs() << "Address: " << format("0x%llx", Entry.first)
- << ", Count: " << Entry.second << "\n";
+ dbgs() << "Address: " << format("0x%llx", Entry.InstructionAddr)
+ << " Data Address: " << format("0x%llx", Entry.DataAddr)
+ << " Count: " << Entry.Count << "\n";
}
}
private:
void parsePerfTrace(StringRef PerfTrace);
- MapVector<uint64_t, uint64_t> AddressToCount;
+ DenseMap<uint64_t, DenseMap<uint64_t, uint64_t>> AddressMap;
StringRef PerfTraceFilename;
+
+ PerfScriptReader::MMapEvent DataMMap;
+ DataSegment DataSegment;
};
} // namespace llvm
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 3507389922500..d26b7ce6ed657 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -341,12 +341,6 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
~(PageSize - 1U));
TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
}
- // else if ((Phdr.p_flags & ELF::PF_R) && !TextSegmentOffsets.empty()) {
- // if (RecordDataSegment) {
- // ReadOnlyDataSegmentOffsets.push_back(Phdr.p_offset &
- // ~(PageSize - 1U));
- // }
- // }
}
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 0588cb48b2af6..449b7ae81e896 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -205,6 +205,8 @@ class ProfiledBinary {
// The file offset of each executable segment.
std::vector<uint64_t> TextSegmentOffsets;
+ std::vector<uint64_t> ReadOnlyDataSegmentOffsets;
+
// Mutiple MC component info
std::unique_ptr<const MCRegisterInfo> MRI;
std::unique_ptr<const MCAsmInfo> AsmInfo;
diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp
index acdadf7ba0c4b..55972ec961790 100644
--- a/llvm/tools/llvm-profgen/llvm-profgen.cpp
+++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp
@@ -206,8 +206,7 @@ int main(int argc, const char *argv[]) {
} else {
assert(Binary.get() &&
"Binary should be initialized for data access profile");
- errs() << "binary text segment offset is "
- << format("0x%" PRIx64 ":", Binary->getTextSegmentOffset()) << "\n";
+
// data access profile.
SmallVector<StringRef, 4> PerfTraces{PerfScriptFilename};
auto Reader = std::make_unique<DataAccessPerfReader>(
More information about the llvm-commits
mailing list