[llvm-branch-commits] [llvm-profgen] Add --time-profgen (PR #191930)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Apr 13 20:11:16 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Add `NamedRegionTimer`s to main profgen phases:
- Parse and aggregate trace (`parseAndAggregateTrace`)
- Unwind samples (`unwindSamples`)
- Generate profile (`ProfileGenerator::generateProfile`)
- Generate CS profile (`CSProfileGenerator::generateProfile`)

Test Plan:
```
$ llvm-profgen --time-profgen ...

===-------------------------------------------------------------------------===
                                  llvm-profgen
===-------------------------------------------------------------------------===
  Total Execution Time: 2826.6549 seconds (2873.3410 wall clock)

   ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Name ---
  1059.4929 ( 38.1%)   8.5146 ( 17.3%)  1068.0075 ( 37.8%)  1090.6604 ( 38.0%)  Generate CS profile
  892.6504 ( 32.1%)  39.8720 ( 80.9%)  932.5224 ( 33.0%)  950.7938 ( 33.1%)  Parse and aggregate trace
  825.2141 ( 29.7%)   0.9110 (  1.8%)  826.1250 ( 29.2%)  831.8868 ( 29.0%)  Unwind samples
  2777.3573 (100.0%)  49.2975 (100.0%)  2826.6549 (100.0%)  2873.3410 (100.0%)  Total
```


---
Full diff: https://github.com/llvm/llvm-project/pull/191930.diff


3 Files Affected:

- (modified) llvm/tools/llvm-profgen/Options.h (+1) 
- (modified) llvm/tools/llvm-profgen/PerfReader.cpp (+16-5) 
- (modified) llvm/tools/llvm-profgen/ProfileGenerator.cpp (+5) 


``````````diff
diff --git a/llvm/tools/llvm-profgen/Options.h b/llvm/tools/llvm-profgen/Options.h
index b2c941fb01945..395a55726274d 100644
--- a/llvm/tools/llvm-profgen/Options.h
+++ b/llvm/tools/llvm-profgen/Options.h
@@ -23,6 +23,7 @@ extern cl::opt<bool> InferMissingFrames;
 extern cl::opt<bool> EnableCSPreInliner;
 extern cl::opt<bool> UseContextCostForPreInliner;
 extern cl::opt<bool> LoadFunctionFromSymbol;
+extern cl::opt<bool> TimeProfGen;
 
 } // end namespace llvm
 
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index e1024092dbe0d..aa913a336a76b 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/ToolOutputFile.h"
 
 #define DEBUG_TYPE "perf-reader"
@@ -67,6 +68,13 @@ static cl::opt<std::string> FilterBuildID(
              "with a matching build ID prefix are kept."),
     cl::cat(ProfGenCategory));
 
+cl::opt<bool> TimeProfGen("time-profgen",
+                         cl::desc("Time llvm-profgen phases"), cl::init(false),
+                         cl::cat(ProfGenCategory));
+
+static const char *TimerGroupName = "profgen";
+static const char *TimerGroupDesc = "llvm-profgen";
+
 namespace sampleprof {
 
 void VirtualUnwinder::unwindCall(UnwindState &State) {
@@ -601,6 +609,8 @@ static std::string getContextKeyStr(ContextKey *K,
 }
 
 void HybridPerfReader::unwindSamples() {
+  NamedRegionTimer T("unwind", "Unwind samples", TimerGroupName,
+                     TimerGroupDesc, TimeProfGen);
   VirtualUnwinder Unwinder(&SampleCounters, Binary);
   for (const auto &Item : AggregatedSamples) {
     const PerfSample *Sample = Item.first.getPtr();
@@ -756,7 +766,7 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
   // It's in bottom-up order with each frame in one line.
 
   // Extract stack frames from sample
-  while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true)) {
+  while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) {
     StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
     uint64_t FrameAddr = 0;
     StringRef FrameBuildID;
@@ -811,7 +821,7 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
   // Skip other unrelated line, find the next valid LBR line
   // Note that even for empty call stack, we should skip the address at the
   // bottom, otherwise the following pass may generate a truncated callstack
-  while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true)) {
+  while (!TraceIt.isAtEoF() && !isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) {
     TraceIt.advance();
   }
   // Filter out broken stack sample. We may not have complete frame info
@@ -856,14 +866,14 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
   // Parsing call stack and populate into PerfSample.CallStack
   if (!extractCallstack(TraceIt, Sample->CallStack)) {
     // Skip the next LBR line matched current call stack
-    if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true))
+    if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated))
       TraceIt.advance();
     return;
   }
 
   warnIfMissingMMap();
 
-  if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true)) {
+  if (!TraceIt.isAtEoF() && isLBRSample(TraceIt.getCurrentLine(), true, IsPreAggregated)) {
     // Parsing LBR stack and populate into PerfSample.LBRStack
     if (extractLBRStack(TraceIt, Sample->LBRStack)) {
       if (IgnoreStackSamples) {
@@ -1178,7 +1188,8 @@ void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
 }
 
 void PerfScriptReader::parseAndAggregateTrace() {
-  // Trace line iterator
+  NamedRegionTimer T("parseTrace", "Parse and aggregate trace", TimerGroupName,
+                     TimerGroupDesc, TimeProfGen);
   TraceStream TraceIt(PerfTraceFile);
   while (!TraceIt.isAtEoF())
     parseEventOrSample(TraceIt);
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index c3f489040007e..3f1229421d2c9 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -13,6 +13,7 @@
 #include "ProfiledBinary.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Support/Timer.h"
 #include <algorithm>
 #include <float.h>
 #include <unordered_set>
@@ -501,6 +502,8 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
 }
 
 void ProfileGenerator::generateProfile() {
+  NamedRegionTimer T("generate", "Generate profile", "profgen",
+                     "llvm-profgen", TimeProfGen);
   collectProfiledFunctions();
 
   if (Binary->usePseudoProbes()) {
@@ -921,6 +924,8 @@ CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
 }
 
 void CSProfileGenerator::generateProfile() {
+  NamedRegionTimer T("generate", "Generate CS profile", "profgen",
+                     "llvm-profgen", TimeProfGen);
   FunctionSamples::ProfileIsCS = true;
 
   collectProfiledFunctions();

``````````

</details>


https://github.com/llvm/llvm-project/pull/191930


More information about the llvm-branch-commits mailing list