[llvm] [llvm-profgen] Add support for Linux kenrel profile (PR #92831)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 15:33:34 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: None (xur-llvm)
<details>
<summary>Changes</summary>
Add the support to handle Linux kernel perf files. The functionality is under option -kernel. Note that currently only main kernel (in vmlinux) is handled: kernel modules are not handled.
---
Full diff: https://github.com/llvm/llvm-project/pull/92831.diff
4 Files Affected:
- (modified) llvm/tools/llvm-profgen/PerfReader.cpp (+82-46)
- (modified) llvm/tools/llvm-profgen/PerfReader.h (+17-17)
- (modified) llvm/tools/llvm-profgen/ProfiledBinary.cpp (+7)
- (modified) llvm/tools/llvm-profgen/ProfiledBinary.h (+11)
``````````diff
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index e9442027aed3f..36c8ebd6af566 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -321,7 +321,7 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
std::unique_ptr<PerfReaderBase>
PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
- std::optional<uint32_t> PIDFilter) {
+ std::optional<int32_t> PIDFilter) {
std::unique_ptr<PerfReaderBase> PerfReader;
if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) {
@@ -355,7 +355,7 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
PerfInputFile
PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary,
PerfInputFile &File,
- std::optional<uint32_t> PIDFilter) {
+ std::optional<int32_t> PIDFilter) {
StringRef PerfData = File.InputFile;
// Run perf script to retrieve PIDs matching binary we're interested in.
auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf");
@@ -363,49 +363,61 @@ PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary,
exitWithError("Perf not found.");
}
std::string PerfPath = *PerfExecutable;
-
SmallString<128> PerfTraceFile;
sys::fs::createUniquePath("perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp",
PerfTraceFile, /*MakeAbsolute=*/true);
std::string ErrorFile = std::string(PerfTraceFile) + ".err";
- StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
- "-F", "comm,pid", "-i",
- PerfData};
std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin
StringRef(PerfTraceFile), // Stdout
StringRef(ErrorFile)}; // Stderr
- sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects);
-
PerfScriptReader::TempFileCleanups.emplace_back(PerfTraceFile);
PerfScriptReader::TempFileCleanups.emplace_back(ErrorFile);
- // Collect the PIDs
- TraceStream TraceIt(PerfTraceFile);
+ // For a kernel perf file, we assume --branch_filter=k is used. So there is
+ // no need for PIDs.
std::string PIDs;
- std::unordered_set<uint32_t> PIDSet;
- while (!TraceIt.isAtEoF()) {
- MMapEvent MMap;
- if (isMMap2Event(TraceIt.getCurrentLine()) &&
- extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
- auto It = PIDSet.emplace(MMap.PID);
- if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
- if (!PIDs.empty()) {
- PIDs.append(",");
+ if (!Binary->isKernel()) {
+ StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
+ "-F", "comm,pid", "-i",
+ PerfData};
+ sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects);
+
+ // Collect the PIDs
+ TraceStream TraceIt(PerfTraceFile);
+ std::unordered_set<int32_t> PIDSet;
+ while (!TraceIt.isAtEoF()) {
+ MMapEvent MMap;
+ if (isMMapEvent(TraceIt.getCurrentLine()) &&
+ extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
+ auto It = PIDSet.emplace(MMap.PID);
+ if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
+ if (!PIDs.empty()) {
+ PIDs.append(",");
+ }
+ PIDs.append(utostr(MMap.PID));
}
- PIDs.append(utostr(MMap.PID));
}
+ TraceIt.advance();
}
- TraceIt.advance();
- }
- if (PIDs.empty()) {
- exitWithError("No relevant mmap event is found in perf data.");
+ if (PIDs.empty()) {
+ exitWithError("No relevant mmap event is found in perf data.");
+ }
}
// Run perf script again to retrieve events for PIDs collected above
- StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events",
- "-F", "ip,brstack", "--pid",
- PIDs, "-i", PerfData};
+ SmallVector<StringRef, 8> ScriptSampleArgs;
+ ScriptSampleArgs.push_back(PerfPath);
+ ScriptSampleArgs.push_back("script");
+ ScriptSampleArgs.push_back("--show-mmap-events");
+ ScriptSampleArgs.push_back("-F");
+ ScriptSampleArgs.push_back("ip,brstack");
+ ScriptSampleArgs.push_back("-i");
+ ScriptSampleArgs.push_back(PerfData);
+ if (!PIDs.empty()) {
+ ScriptSampleArgs.push_back("--pid");
+ ScriptSampleArgs.push_back(PIDs);
+ }
sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, std::nullopt, Redirects);
return {std::string(PerfTraceFile), PerfFormat::PerfScript,
@@ -428,7 +440,10 @@ static StringRef filename(StringRef Path, bool UseBackSlash) {
void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
// Drop the event which doesn't belong to user-provided binary
StringRef BinaryName = filename(Event.BinaryPath, Binary->isCOFF());
- if (Binary->getName() != BinaryName)
+ bool IsKernel = Binary->isKernel();
+ if (!IsKernel && Binary->getName() != BinaryName)
+ return;
+ if (IsKernel && !Binary->isKernelImageName(BinaryName))
return;
// Drop the event if process does not match pid filter
@@ -441,7 +456,7 @@ void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
return;
}
- if (Event.Offset == Binary->getTextSegmentOffset()) {
+ if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) {
// A binary image could be unloaded and then reloaded at different
// place, so update binary load address.
// Only update for the first executable segment and assume all other
@@ -950,16 +965,23 @@ void PerfScriptReader::parseSample(TraceStream &TraceIt) {
parseSample(TraceIt, Count);
}
-bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
- StringRef Line,
- MMapEvent &MMap) {
- // Parse a line like:
+bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
+ StringRef Line,
+ MMapEvent &MMap) {
+ // Parse a MMap2 line like:
// PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
// 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
- constexpr static const char *const Pattern =
- "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
+ constexpr static const char *const MMap2Pattern =
+ "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: "
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
"(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
+ // Parse a MMap line like
+ // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \
+ // 0xffffffff81e00000]: x [kernel.kallsyms]_text
+ constexpr static const char *const MMapPattern =
+ "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: "
+ "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
+ "(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)";
// Field 0 - whole line
// Field 1 - PID
// Field 2 - base address
@@ -975,14 +997,25 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
BINARY_PATH = 5
};
- Regex RegMmap2(Pattern);
+ bool R = false;
SmallVector<StringRef, 6> Fields;
- bool R = RegMmap2.match(Line, &Fields);
+ if (Line.contains("PERF_RECORD_MMAP2 ")) {
+ Regex RegMmap2(MMap2Pattern);
+ R = RegMmap2.match(Line, &Fields);
+ } else if (Line.contains("PERF_RECORD_MMAP ")) {
+ Regex RegMmap(MMapPattern);
+ R = RegMmap.match(Line, &Fields);
+ } else
+ llvm_unreachable("unexpected MMAP event entry");
+
if (!R) {
std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n";
WithColor::warning() << WarningMsg;
+ return false;
}
- Fields[PID].getAsInteger(10, MMap.PID);
+ long long MMapPID = 0;
+ getAsSignedInteger(Fields[PID], 10, MMapPID);
+ MMap.PID = MMapPID;
Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address);
Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size);
Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset);
@@ -993,19 +1026,22 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
}
StringRef BinaryName = filename(MMap.BinaryPath, Binary->isCOFF());
+ if (Binary->isKernel()) {
+ return Binary->isKernelImageName(BinaryName);
+ }
return Binary->getName() == BinaryName;
}
-void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) {
+void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) {
MMapEvent MMap;
- if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
+ if (extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
updateBinaryAddress(MMap);
TraceIt.advance();
}
void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
- if (isMMap2Event(TraceIt.getCurrentLine()))
- parseMMap2Event(TraceIt);
+ if (isMMapEvent(TraceIt.getCurrentLine()))
+ parseMMapEvent(TraceIt);
else
parseSample(TraceIt);
}
@@ -1032,7 +1068,7 @@ bool PerfScriptReader::isLBRSample(StringRef Line) {
return false;
}
-bool PerfScriptReader::isMMap2Event(StringRef Line) {
+bool PerfScriptReader::isMMapEvent(StringRef Line) {
// Short cut to avoid string find is possible.
if (Line.empty() || Line.size() < 50)
return false;
@@ -1040,9 +1076,9 @@ bool PerfScriptReader::isMMap2Event(StringRef Line) {
if (std::isdigit(Line[0]))
return false;
- // PERF_RECORD_MMAP2 does not appear at the beginning of the line
- // for ` perf script --show-mmap-events -i ...`
- return Line.contains("PERF_RECORD_MMAP2");
+ // PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of
+ // the line for ` perf script --show-mmap-events -i ...`
+ return Line.contains("PERF_RECORD_MMAP");
}
// The raw hybird sample is like
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index b821cbe13efae..2366479c38941 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -570,7 +570,7 @@ class PerfReaderBase {
virtual ~PerfReaderBase() = default;
static std::unique_ptr<PerfReaderBase>
create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
- std::optional<uint32_t> PIDFilter);
+ std::optional<int32_t> PIDFilter);
// Entry of the reader to parse multiple perf traces
virtual void parsePerfTraces() = 0;
@@ -595,15 +595,15 @@ class PerfReaderBase {
class PerfScriptReader : public PerfReaderBase {
public:
PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
- std::optional<uint32_t> PID)
- : PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
+ std::optional<int32_t> PID)
+ : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
// Entry of the reader to parse multiple perf traces
void parsePerfTraces() override;
// Generate perf script from perf data
- static PerfInputFile
- convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
- std::optional<uint32_t> PIDFilter);
+ static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
+ PerfInputFile &File,
+ std::optional<int32_t> PIDFilter);
// Extract perf script type by peaking at the input
static PerfContent checkPerfScriptType(StringRef FileName);
@@ -615,7 +615,7 @@ class PerfScriptReader : public PerfReaderBase {
protected:
// The parsed MMap event
struct MMapEvent {
- uint64_t PID = 0;
+ int64_t PID = 0;
uint64_t Address = 0;
uint64_t Size = 0;
uint64_t Offset = 0;
@@ -625,15 +625,15 @@ class PerfScriptReader : public PerfReaderBase {
// Check whether a given line is LBR sample
static bool isLBRSample(StringRef Line);
// Check whether a given line is MMAP event
- static bool isMMap2Event(StringRef Line);
- // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
+ static bool isMMapEvent(StringRef Line);
+ // Parse a single line of a PERF_RECORD_MMAP event looking for a
// mapping between the binary name and its memory layout.
- static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
- MMapEvent &MMap);
+ static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
+ MMapEvent &MMap);
// Update base address based on mmap events
void updateBinaryAddress(const MMapEvent &Event);
// Parse mmap event and update binary address
- void parseMMap2Event(TraceStream &TraceIt);
+ void parseMMapEvent(TraceStream &TraceIt);
// Parse perf events/samples and do aggregation
void parseAndAggregateTrace();
// Parse either an MMAP event or a perf sample
@@ -669,7 +669,7 @@ class PerfScriptReader : public PerfReaderBase {
// Keep track of all invalid return addresses
std::set<uint64_t> InvalidReturnAddresses;
// PID for the process of interest
- std::optional<uint32_t> PIDFilter;
+ std::optional<int32_t> PIDFilter;
};
/*
@@ -681,8 +681,8 @@ class PerfScriptReader : public PerfReaderBase {
class LBRPerfReader : public PerfScriptReader {
public:
LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
- std::optional<uint32_t> PID)
- : PerfScriptReader(Binary, PerfTrace, PID){};
+ std::optional<int32_t> PID)
+ : PerfScriptReader(Binary, PerfTrace, PID) {};
// Parse the LBR only sample.
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
};
@@ -699,8 +699,8 @@ class LBRPerfReader : public PerfScriptReader {
class HybridPerfReader : public PerfScriptReader {
public:
HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
- std::optional<uint32_t> PID)
- : PerfScriptReader(Binary, PerfTrace, PID){};
+ std::optional<int32_t> PID)
+ : PerfScriptReader(Binary, PerfTrace, PID) {};
// Parse the hybrid sample including the call and LBR line
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
void generateUnsymbolizedProfile() override;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 1baf35820f97f..a7e506d32ac2e 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -56,6 +56,10 @@ static cl::list<std::string> DisassembleFunctions(
cl::desc("List of functions to print disassembly for. Accept demangled "
"names only. Only work with show-disassembly-only"));
+static cl::opt<bool>
+ KernelBinary("kernel",
+ cl::desc("Generate the profile for Linux kernel binary."));
+
extern cl::opt<bool> ShowDetailedWarning;
extern cl::opt<bool> InferMissingFrames;
@@ -221,6 +225,9 @@ void ProfiledBinary::load() {
LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
+ // Mark the binary as a kernel image;
+ IsKernel = KernelBinary;
+
// Find the preferred load address for text sections.
setPreferredTextSegmentAddresses(Obj);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 5d2088ad7691c..bf7c7e365bbad 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -291,6 +291,9 @@ class ProfiledBinary {
// Whether we need to symbolize all instructions to get function context size.
bool TrackFuncContextSize = false;
+ // Whether this is a kernel image;
+ bool IsKernel = false;
+
// Indicate if the base loading address is parsed from the mmap event or uses
// the preferred address
bool IsLoadedByMMap = false;
@@ -428,6 +431,14 @@ class ProfiledBinary {
bool usePseudoProbes() const { return UsePseudoProbes; }
bool useFSDiscriminator() const { return UseFSDiscriminator; }
+ bool isKernel() const { return IsKernel; }
+
+ bool isKernelImageName(StringRef BinaryName) const {
+ return BinaryName == "[kernel.kallsyms]" ||
+ BinaryName == "[kernel.kallsyms]_stext" ||
+ BinaryName == "[kernel.kallsyms]_text";
+ }
+
// Get the index in CodeAddressVec for the address
// As we might get an address which is not the code
// here it would round to the next valid code address by
``````````
</details>
https://github.com/llvm/llvm-project/pull/92831
More information about the llvm-commits
mailing list