[llvm] a044d04 - [llvm-profdata] Support JSON as as an output-only format

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 9 16:24:58 PDT 2022


Author: Kazu Hirata
Date: 2022-08-09T16:24:53-07:00
New Revision: a044d0491efeb821f1f23a935515692984b21ad5

URL: https://github.com/llvm/llvm-project/commit/a044d0491efeb821f1f23a935515692984b21ad5
DIFF: https://github.com/llvm/llvm-project/commit/a044d0491efeb821f1f23a935515692984b21ad5.diff

LOG: [llvm-profdata] Support JSON as as an output-only format

This patch teaches llvm-profdata to output the sample profile in the
JSON format.  The new option is intended to be used for research and
development purposes.  For example, one can write a Python script to
take a JSON file and analyze how similar different inline instances of
a given function are to each other.

I've chosen JSON because Python can parse it reasonably fast, and it
just takes a couple of lines to read the whole data:

  import json
  with open ('profile.json') as f:
    profile = json.load(f)

Differential Revision: https://reviews.llvm.org/D130944

Added: 
    llvm/test/tools/llvm-profdata/sample-profile-json.test

Modified: 
    llvm/include/llvm/ProfileData/SampleProfReader.h
    llvm/lib/ProfileData/SampleProfReader.cpp
    llvm/tools/llvm-profdata/llvm-profdata.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index b96d6c70dae44..ba64e15115355 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -391,6 +391,9 @@ class SampleProfileReader {
   /// Print all the profiles on stream \p OS.
   void dump(raw_ostream &OS = dbgs());
 
+  /// Print all the profiles on stream \p OS in the JSON format.
+  void dumpJson(raw_ostream &OS = dbgs());
+
   /// Return the samples collected for function \p F.
   FunctionSamples *getSamplesFor(const Function &F) {
     // The function name may have been updated by adding suffix. Call

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 204e34bff8798..8a97697ffcea4 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/JSON.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MD5.h"
@@ -72,6 +73,79 @@ void SampleProfileReader::dump(raw_ostream &OS) {
     dumpFunctionProfile(I.first, OS);
 }
 
+static void dumpFunctionProfileJson(const FunctionSamples &S,
+                                    json::OStream &JOS, bool TopLevel = false) {
+  auto DumpBody = [&](const BodySampleMap &BodySamples) {
+    for (const auto &I : BodySamples) {
+      const LineLocation &Loc = I.first;
+      const SampleRecord &Sample = I.second;
+      JOS.object([&] {
+        JOS.attribute("line", Loc.LineOffset);
+        if (Loc.Discriminator)
+          JOS.attribute("discriminator", Loc.Discriminator);
+        JOS.attribute("samples", Sample.getSamples());
+
+        auto CallTargets = Sample.getSortedCallTargets();
+        if (!CallTargets.empty()) {
+          JOS.attributeArray("calls", [&] {
+            for (const auto &J : CallTargets) {
+              JOS.object([&] {
+                JOS.attribute("function", J.first);
+                JOS.attribute("samples", J.second);
+              });
+            }
+          });
+        }
+      });
+    }
+  };
+
+  auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
+    for (const auto &I : CallsiteSamples)
+      for (const auto &FS : I.second) {
+        const LineLocation &Loc = I.first;
+        const FunctionSamples &CalleeSamples = FS.second;
+        JOS.object([&] {
+          JOS.attribute("line", Loc.LineOffset);
+          if (Loc.Discriminator)
+            JOS.attribute("discriminator", Loc.Discriminator);
+          JOS.attributeArray(
+              "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); });
+        });
+      }
+  };
+
+  JOS.object([&] {
+    JOS.attribute("name", S.getName());
+    JOS.attribute("total", S.getTotalSamples());
+    if (TopLevel)
+      JOS.attribute("head", S.getHeadSamples());
+
+    const auto &BodySamples = S.getBodySamples();
+    if (!BodySamples.empty())
+      JOS.attributeArray("body", [&] { DumpBody(BodySamples); });
+
+    const auto &CallsiteSamples = S.getCallsiteSamples();
+    if (!CallsiteSamples.empty())
+      JOS.attributeArray("callsites",
+                         [&] { DumpCallsiteSamples(CallsiteSamples); });
+  });
+}
+
+/// Dump all the function profiles found on stream \p OS in the JSON format.
+void SampleProfileReader::dumpJson(raw_ostream &OS) {
+  std::vector<NameFunctionSamples> V;
+  sortFuncProfiles(Profiles, V);
+  json::OStream JOS(OS, 2);
+  JOS.arrayBegin();
+  for (const auto &[FC, FS] : V)
+    dumpFunctionProfileJson(*FS, JOS, true);
+  JOS.arrayEnd();
+
+  // Emit a newline character at the end as json::OStream doesn't emit one.
+  OS << "\n";
+}
+
 /// Parse \p Input as function head.
 ///
 /// Parse one line of \p Input, and update function name in \p FName,

diff  --git a/llvm/test/tools/llvm-profdata/sample-profile-json.test b/llvm/test/tools/llvm-profdata/sample-profile-json.test
new file mode 100644
index 0000000000000..c9f65ea091409
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/sample-profile-json.test
@@ -0,0 +1,104 @@
+RUN: llvm-profdata show --sample --json %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=JSON
+JSON:      [
+JSON-NEXT:   {
+JSON-NEXT:     "name": "main",
+JSON-NEXT:     "total": 184019,
+JSON-NEXT:     "head": 0,
+JSON-NEXT:     "body": [
+JSON-NEXT:       {
+JSON-NEXT:         "line": 4,
+JSON-NEXT:         "samples": 534
+JSON-NEXT:       },
+JSON-NEXT:       {
+JSON-NEXT:         "line": 4,
+JSON-NEXT:         "discriminator": 2,
+JSON-NEXT:         "samples": 534
+JSON-NEXT:       },
+JSON-NEXT:       {
+JSON-NEXT:         "line": 5,
+JSON-NEXT:         "samples": 1075
+JSON-NEXT:       },
+JSON-NEXT:       {
+JSON-NEXT:         "line": 5,
+JSON-NEXT:         "discriminator": 1,
+JSON-NEXT:         "samples": 1075
+JSON-NEXT:       },
+JSON-NEXT:       {
+JSON-NEXT:         "line": 6,
+JSON-NEXT:         "samples": 2080
+JSON-NEXT:       },
+JSON-NEXT:       {
+JSON-NEXT:         "line": 7,
+JSON-NEXT:         "samples": 534
+JSON-NEXT:       },
+JSON-NEXT:       {
+JSON-NEXT:         "line": 9,
+JSON-NEXT:         "samples": 2064,
+JSON-NEXT:         "calls": [
+JSON-NEXT:           {
+JSON-NEXT:             "function": "_Z3bari",
+JSON-NEXT:             "samples": 1471
+JSON-NEXT:           },
+JSON-NEXT:           {
+JSON-NEXT:             "function": "_Z3fooi",
+JSON-NEXT:             "samples": 631
+JSON-NEXT:           }
+JSON-NEXT:         ]
+JSON-NEXT:       }
+JSON-NEXT:     ],
+JSON-NEXT:     "callsites": [
+JSON-NEXT:       {
+JSON-NEXT:         "line": 10,
+JSON-NEXT:         "samples": [
+JSON-NEXT:           {
+JSON-NEXT:             "name": "inline1",
+JSON-NEXT:             "total": 1000,
+JSON-NEXT:             "body": [
+JSON-NEXT:               {
+JSON-NEXT:                 "line": 1,
+JSON-NEXT:                 "samples": 1000
+JSON-NEXT:               }
+JSON-NEXT:             ]
+JSON-NEXT:           }
+JSON-NEXT:         ]
+JSON-NEXT:       },
+JSON-NEXT:       {
+JSON-NEXT:         "line": 10,
+JSON-NEXT:         "samples": [
+JSON-NEXT:           {
+JSON-NEXT:             "name": "inline2",
+JSON-NEXT:             "total": 2000,
+JSON-NEXT:             "body": [
+JSON-NEXT:               {
+JSON-NEXT:                 "line": 1,
+JSON-NEXT:                 "samples": 2000
+JSON-NEXT:               }
+JSON-NEXT:             ]
+JSON-NEXT:           }
+JSON-NEXT:         ]
+JSON-NEXT:       }
+JSON-NEXT:     ]
+JSON-NEXT:   },
+JSON-NEXT:   {
+JSON-NEXT:     "name": "_Z3bari",
+JSON-NEXT:     "total": 20301,
+JSON-NEXT:     "head": 1437,
+JSON-NEXT:     "body": [
+JSON-NEXT:       {
+JSON-NEXT:         "line": 1,
+JSON-NEXT:         "samples": 1437
+JSON-NEXT:       }
+JSON-NEXT:     ]
+JSON-NEXT:   },
+JSON-NEXT:   {
+JSON-NEXT:     "name": "_Z3fooi",
+JSON-NEXT:     "total": 7711,
+JSON-NEXT:     "head": 610,
+JSON-NEXT:     "body": [
+JSON-NEXT:       {
+JSON-NEXT:         "line": 1,
+JSON-NEXT:         "samples": 610
+JSON-NEXT:       }
+JSON-NEXT:     ]
+JSON-NEXT:   }
+JSON-NEXT: ]

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 3af8f800adcb8..e3921281b4afd 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -2488,7 +2488,7 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
                              const std::string &ShowFunction,
                              bool ShowProfileSymbolList,
                              bool ShowSectionInfoOnly, bool ShowHotFuncList,
-                             raw_fd_ostream &OS) {
+                             bool JsonFormat, raw_fd_ostream &OS) {
   using namespace sampleprof;
   LLVMContext Context;
   auto ReaderOrErr =
@@ -2505,11 +2505,20 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
   if (std::error_code EC = Reader->read())
     exitWithErrorCode(EC, Filename);
 
-  if (ShowAllFunctions || ShowFunction.empty())
-    Reader->dump(OS);
-  else
+  if (ShowAllFunctions || ShowFunction.empty()) {
+    if (JsonFormat)
+      Reader->dumpJson(OS);
+    else
+      Reader->dump(OS);
+  } else {
+    if (JsonFormat)
+      exitWithError(
+          "the JSON format is supported only when all functions are to "
+          "be printed");
+
     // TODO: parse context string to support filtering by contexts.
     Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
+  }
 
   if (ShowProfileSymbolList) {
     std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
@@ -2582,6 +2591,9 @@ static int show_main(int argc, const char *argv[]) {
   cl::opt<bool> TextFormat(
       "text", cl::init(false),
       cl::desc("Show instr profile data in text dump format"));
+  cl::opt<bool> JsonFormat(
+      "json", cl::init(false),
+      cl::desc("Show sample profile data in the JSON format"));
   cl::opt<bool> ShowIndirectCallTargets(
       "ic-targets", cl::init(false),
       cl::desc("Show indirect call site target values for shown functions"));
@@ -2679,10 +2691,10 @@ static int show_main(int argc, const char *argv[]) {
         ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
         TextFormat, ShowBinaryIds, ShowCovered, OS);
   if (ProfileKind == sample)
-    return showSampleProfile(Filename, ShowCounts, TopNFunctions,
-                             ShowAllFunctions, ShowDetailedSummary,
-                             ShowFunction, ShowProfileSymbolList,
-                             ShowSectionInfoOnly, ShowHotFuncList, OS);
+    return showSampleProfile(
+        Filename, ShowCounts, TopNFunctions, ShowAllFunctions,
+        ShowDetailedSummary, ShowFunction, ShowProfileSymbolList,
+        ShowSectionInfoOnly, ShowHotFuncList, JsonFormat, OS);
   return showMemProfProfile(Filename, ProfiledBinary, OS);
 }
 


        


More information about the llvm-commits mailing list