[llvm-branch-commits] [llvm-profgen] Add --time-profgen (PR #191930)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Apr 13 20:11:16 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Add `NamedRegionTimer`s to main profgen phases:
- Parse and aggregate trace (`parseAndAggregateTrace`)
- Unwind samples (`unwindSamples`)
- Generate profile (`ProfileGenerator::generateProfile`)
- Generate CS profile (`CSProfileGenerator::generateProfile`)
Test Plan:
```
$ llvm-profgen --time-profgen ...
===-------------------------------------------------------------------------===
llvm-profgen
===-------------------------------------------------------------------------===
Total Execution Time: 2826.6549 seconds (2873.3410 wall clock)
---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Name ---
1059.4929 ( 38.1%) 8.5146 ( 17.3%) 1068.0075 ( 37.8%) 1090.6604 ( 38.0%) Generate CS profile
892.6504 ( 32.1%) 39.8720 ( 80.9%) 932.5224 ( 33.0%) 950.7938 ( 33.1%) Parse and aggregate trace
825.2141 ( 29.7%) 0.9110 ( 1.8%) 826.1250 ( 29.2%) 831.8868 ( 29.0%) Unwind samples
2777.3573 (100.0%) 49.2975 (100.0%) 2826.6549 (100.0%) 2873.3410 (100.0%) Total
```
---
Full diff: https://github.com/llvm/llvm-project/pull/191930.diff
3 Files Affected:
- (modified) llvm/tools/llvm-profgen/Options.h (+1)
- (modified) llvm/tools/llvm-profgen/PerfReader.cpp (+16-5)
- (modified) llvm/tools/llvm-profgen/ProfileGenerator.cpp (+5)
``````````diff
diff --git a/llvm/tools/llvm-profgen/Options.h b/llvm/tools/llvm-profgen/Options.h
index b2c941fb01945..395a55726274d 100644
--- a/llvm/tools/llvm-profgen/Options.h
+++ b/llvm/tools/llvm-profgen/Options.h
@@ -23,6 +23,7 @@ extern cl::opt<bool> InferMissingFrames;
extern cl::opt<bool> EnableCSPreInliner;
extern cl::opt<bool> UseContextCostForPreInliner;
extern cl::opt<bool> LoadFunctionFromSymbol;
+extern cl::opt<bool> TimeProfGen;
} // end namespace llvm
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index e1024092dbe0d..aa913a336a76b 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
#define DEBUG_TYPE "perf-reader"
@@ -67,6 +68,13 @@ static cl::opt<std::string> FilterBuildID(
"with a matching build ID prefix are kept."),
cl::cat(ProfGenCategory));
+cl::opt<bool> TimeProfGen("time-profgen",
+ cl::desc("Time llvm-profgen phases"), cl::init(false),
+ cl::cat(ProfGenCategory));
+
+static const char *TimerGroupName = "profgen";
+static const char *TimerGroupDesc = "llvm-profgen";
+
namespace sampleprof {
void VirtualUnwinder::unwindCall(UnwindState &State) {
@@ -601,6 +609,8 @@ static std::string getContextKeyStr(ContextKey *K,
}
void HybridPerfReader::unwindSamples() {
+ NamedRegionTimer T("unwind", "Unwind samples", TimerGroupName,
+ TimerGroupDesc, TimeProfGen);
VirtualUnwinder Unwinder(&SampleCounters, Binary);
for (const auto &Item : AggregatedSamples) {
const PerfSample *Sample = Item.first.getPtr();
@@ -756,7 +766,7 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
// It's in bottom-up order with each frame in one line.
// Extract stack frames from sample
- while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true)) {
+ while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) {
StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
uint64_t FrameAddr = 0;
StringRef FrameBuildID;
@@ -811,7 +821,7 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
// Skip other unrelated line, find the next valid LBR line
// Note that even for empty call stack, we should skip the address at the
// bottom, otherwise the following pass may generate a truncated callstack
- while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true)) {
+ while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) {
TraceIt.advance();
}
// Filter out broken stack sample. We may not have complete frame info
@@ -856,14 +866,14 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
// Parsing call stack and populate into PerfSample.CallStack
if (!extractCallstack(TraceIt, Sample->CallStack)) {
// Skip the next LBR line matched current call stack
- if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true))
+ if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated))
TraceIt.advance();
return;
}
warnIfMissingMMap();
- if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true)) {
+ if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) {
// Parsing LBR stack and populate into PerfSample.LBRStack
if (extractLBRStack(TraceIt, Sample->LBRStack)) {
if (IgnoreStackSamples) {
@@ -1178,7 +1188,8 @@ void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
}
void PerfScriptReader::parseAndAggregateTrace() {
- // Trace line iterator
+ NamedRegionTimer T("parseTrace", "Parse and aggregate trace", TimerGroupName,
+ TimerGroupDesc, TimeProfGen);
TraceStream TraceIt(PerfTraceFile);
while (!TraceIt.isAtEoF())
parseEventOrSample(TraceIt);
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index c3f489040007e..3f1229421d2c9 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -13,6 +13,7 @@
#include "ProfiledBinary.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Support/Timer.h"
#include <algorithm>
#include <float.h>
#include <unordered_set>
@@ -501,6 +502,8 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
}
void ProfileGenerator::generateProfile() {
+ NamedRegionTimer T("generate", "Generate profile", "profgen",
+ "llvm-profgen", TimeProfGen);
collectProfiledFunctions();
if (Binary->usePseudoProbes()) {
@@ -921,6 +924,8 @@ CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
}
void CSProfileGenerator::generateProfile() {
+ NamedRegionTimer T("generate", "Generate CS profile", "profgen",
+ "llvm-profgen", TimeProfGen);
FunctionSamples::ProfileIsCS = true;
collectProfiledFunctions();
``````````
</details>
https://github.com/llvm/llvm-project/pull/191930
More information about the llvm-branch-commits
mailing list