[llvm] 1f0bc61 - [llvm-porfgen] Allow perf data as input

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 29 23:00:36 PDT 2021


Author: Wenlei He
Date: 2021-09-29T22:57:35-07:00
New Revision: 1f0bc617bdd11a4f3b0d820603b0089f81aca79b

URL: https://github.com/llvm/llvm-project/commit/1f0bc617bdd11a4f3b0d820603b0089f81aca79b
DIFF: https://github.com/llvm/llvm-project/commit/1f0bc617bdd11a4f3b0d820603b0089f81aca79b.diff

LOG: [llvm-porfgen] Allow perf data as input

This change enables llvm-profgen to take raw perf data as alternative input format. Sometimes we need to retrieve evenets for processes with matching binary. Using perf data as input allows us to retrieve process Ids from mmap events for matching binary, then filter by process id during perf script generation.

Differential Revision: https://reviews.llvm.org/D110793

Added: 
    

Modified: 
    llvm/tools/llvm-profgen/PerfReader.cpp
    llvm/tools/llvm-profgen/PerfReader.h
    llvm/tools/llvm-profgen/llvm-profgen.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index c1a6b3bbcaaa..019ca5ab4651 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -8,6 +8,7 @@
 #include "PerfReader.h"
 #include "ProfileGenerator.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Process.h"
 
 #define DEBUG_TYPE "perf-reader"
 
@@ -30,6 +31,7 @@ cl::opt<bool>
                        cl::desc("Ignore call stack samples for hybrid samples "
                                 "and produce context-insensitive profile."));
 
+extern cl::opt<std::string> PerfTraceFilename;
 extern cl::opt<bool> ShowDisassemblyOnly;
 extern cl::opt<bool> ShowSourceLocations;
 extern cl::opt<std::string> OutputFilename;
@@ -278,9 +280,15 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
 std::unique_ptr<PerfReaderBase> PerfReaderBase::create(ProfiledBinary *Binary,
                                                        StringRef PerfInputFile,
                                                        bool IsPerfData) {
-  // TODO: for perf data input, we need to convert them into perf script first.
-  if (IsPerfData)
-    exitWithError("Perf data input not supported yet.");
+  // For perf data input, we need to convert them into perf script first.
+  if (IsPerfData) {
+    std::string ConvertedPerfScript =
+        convertPerfDataToTrace(Binary, PerfInputFile);
+    // Let commoand opt own the string for converted perf trace file name
+    PerfTraceFilename = ConvertedPerfScript;
+    PerfInputFile = PerfTraceFilename;
+  }
+
   PerfScriptType PerfType = checkPerfScriptType(PerfInputFile);
   std::unique_ptr<PerfReaderBase> PerfReader;
   if (PerfType == PERF_LBR_STACK) {
@@ -294,6 +302,47 @@ std::unique_ptr<PerfReaderBase> PerfReaderBase::create(ProfiledBinary *Binary,
   return PerfReader;
 }
 
+std::string PerfReaderBase::convertPerfDataToTrace(ProfiledBinary *Binary,
+                                                   StringRef PerfData) {
+  // Run perf script to retrieve PIDs matching binary we're interested in.
+  auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf");
+  if (!PerfExecutable) {
+    exitWithError("Perf not found.");
+  }
+  std::string PerfPath = *PerfExecutable;
+  std::string PerfTraceFile = PerfData.str() + ".script.tmp";
+  StringRef ScriptMMapArgs[] = {PerfPath, "script",   "--show-mmap-events",
+                                "-F",     "comm,pid", "-i",
+                                PerfData};
+  Optional<StringRef> Redirects[] = {llvm::None,                // Stdin
+                                     StringRef(PerfTraceFile),  // Stdout
+                                     StringRef(PerfTraceFile)}; // Stderr
+  sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, llvm::None, Redirects);
+
+  // Collect the PIDs
+  TraceStream TraceIt(PerfTraceFile);
+  std::string PIDs;
+  while (!TraceIt.isAtEoF()) {
+    MMapEvent MMap;
+    if (isMMap2Event(TraceIt.getCurrentLine()) &&
+        extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
+      if (!PIDs.empty()) {
+        PIDs.append(",");
+      }
+      PIDs.append(utostr(MMap.PID));
+    }
+    TraceIt.advance();
+  }
+
+  // Run perf script again to retrieve events for PIDs collected above
+  StringRef ScriptSampleArgs[] = {PerfPath, "script",     "--show-mmap-events",
+                                  "-F",     "ip,brstack", "--pid",
+                                  PIDs,     "-i",         PerfData};
+  sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, llvm::None, Redirects);
+
+  return PerfTraceFile;
+}
+
 void PerfReaderBase::updateBinaryAddress(const MMapEvent &Event) {
   // Drop the event which doesn't belong to user-provided binary
   StringRef BinaryName = llvm::sys::path::filename(Event.BinaryPath);
@@ -514,7 +563,7 @@ bool PerfReaderBase::extractCallstack(TraceStream &TraceIt,
     }
     TraceIt.advance();
     // Currently intermixed frame from 
diff erent binaries is not supported.
-    // Ignore bottom frames not from binary of interest.
+    // Ignore caller frames not from binary of interest.
     if (!Binary->addressIsCode(FrameAddr))
       break;
 
@@ -724,7 +773,9 @@ void PerfReaderBase::parseSample(TraceStream &TraceIt) {
   parseSample(TraceIt, Count);
 }
 
-void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) {
+bool PerfReaderBase::extractMMap2EventForBinary(ProfiledBinary *Binary,
+                                                StringRef Line,
+                                                MMapEvent &MMap) {
   // Parse a line like:
   //  PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
   //  08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
@@ -749,29 +800,34 @@ void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) {
 
   Regex RegMmap2(Pattern);
   SmallVector<StringRef, 6> Fields;
-  bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields);
+  bool R = RegMmap2.match(Line, &Fields);
   if (!R) {
-    std::string ErrorMsg = "Cannot parse mmap event: Line" +
-                           Twine(TraceIt.getLineNumber()).str() + ": " +
-                           TraceIt.getCurrentLine().str() + " \n";
+    std::string ErrorMsg = "Cannot parse mmap event: " + Line.str() + " \n";
     exitWithError(ErrorMsg);
   }
-  MMapEvent Event;
-  Fields[PID].getAsInteger(10, Event.PID);
-  Fields[MMAPPED_ADDRESS].getAsInteger(0, Event.Address);
-  Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size);
-  Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset);
-  Event.BinaryPath = Fields[BINARY_PATH];
-  updateBinaryAddress(Event);
+  Fields[PID].getAsInteger(10, MMap.PID);
+  Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address);
+  Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size);
+  Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset);
+  MMap.BinaryPath = Fields[BINARY_PATH];
   if (ShowMmapEvents) {
-    outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at "
-           << format("0x%" PRIx64 ":", Event.Address) << " \n";
+    outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at "
+           << format("0x%" PRIx64 ":", MMap.Address) << " \n";
   }
+
+  StringRef BinaryName = llvm::sys::path::filename(MMap.BinaryPath);
+  return Binary->getName() == BinaryName;
+}
+
+void PerfReaderBase::parseMMap2Event(TraceStream &TraceIt) {
+  MMapEvent MMap;
+  if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
+    updateBinaryAddress(MMap);
   TraceIt.advance();
 }
 
 void PerfReaderBase::parseEventOrSample(TraceStream &TraceIt) {
-  if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2"))
+  if (isMMap2Event(TraceIt.getCurrentLine()))
     parseMMap2Event(TraceIt);
   else
     parseSample(TraceIt);
@@ -799,6 +855,19 @@ bool PerfReaderBase::isLBRSample(StringRef Line) {
   return false;
 }
 
+bool PerfReaderBase::isMMap2Event(StringRef Line) {
+  // Short cut to avoid string find is possible.
+  if (Line.empty() || Line.size() < 50)
+    return false;
+
+  if (std::isdigit(Line[0]))
+    return false;
+
+  // PERF_RECORD_MMAP2 does not appear at the beginning of the line
+  // for ` perf script  --show-mmap-events  -i ...`
+  return Line.find("PERF_RECORD_MMAP2") != StringRef::npos;
+}
+
 // The raw hybird sample is like
 // e.g.
 // 	          4005dc    # call stack leaf

diff  --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index c9f2240d0151..9870bcc5411a 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -530,15 +530,22 @@ class PerfReaderBase {
     uint64_t Offset = 0;
     StringRef BinaryPath;
   };
-
-  // Check whther a given line is LBR sample
+  // Generate perf script from perf data
+  static std::string convertPerfDataToTrace(ProfiledBinary *Binary,
+                                            StringRef PerfData);
+  // Check whether a given line is LBR sample
   static bool isLBRSample(StringRef Line);
+  // Check whether a given line is MMAP event
+  static bool isMMap2Event(StringRef Line);
   // Extract perf script type by peaking at the input
   static PerfScriptType checkPerfScriptType(StringRef FileName);
-  // Update base address based on mmap events
-  void updateBinaryAddress(const MMapEvent &Event);
   // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
   // mapping between the binary name and its memory layout.
+  static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
+                                         MMapEvent &MMap);
+  // Update base address based on mmap events
+  void updateBinaryAddress(const MMapEvent &Event);
+  // Parse mmap event and update binary address
   void parseMMap2Event(TraceStream &TraceIt);
   // Parse perf events/samples and do aggregation
   void parseAndAggregateTrace();

diff  --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp
index 48edd4519e4c..b5be250fc12f 100644
--- a/llvm/tools/llvm-profgen/llvm-profgen.cpp
+++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp
@@ -21,7 +21,7 @@
 
 static cl::OptionCategory ProfGenCategory("ProfGen Options");
 
-static cl::opt<std::string> PerfTraceFilename(
+cl::opt<std::string> PerfTraceFilename(
     "perfscript", cl::value_desc("perfscript"), cl::ZeroOrMore,
     llvm::cl::MiscFlags::CommaSeparated,
     cl::desc("Path of perf-script trace created by Linux perf tool with "


        


More information about the llvm-commits mailing list