[llvm] [ctxprof] dump profiles using yaml (for testing) (PR #123108)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 15 14:11:18 PST 2025


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/123108

>From 39ee074d77b9732cbbe53a1b07ca267ecfeb3966 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Wed, 15 Jan 2025 11:07:42 -0800
Subject: [PATCH] [ctxprof] use yaml for serialization (for testing)

---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  |  2 +-
 .../llvm/ProfileData/PGOCtxProfReader.h       |  2 +
 .../llvm/ProfileData/PGOCtxProfWriter.h       |  8 --
 llvm/lib/Analysis/CtxProfAnalysis.cpp         | 51 ++---------
 llvm/lib/ProfileData/PGOCtxProfReader.cpp     | 87 +++++++++++++++++++
 llvm/lib/ProfileData/PGOCtxProfWriter.cpp     | 10 ++-
 .../Analysis/CtxProfAnalysis/full-cycle.ll    | 62 +++----------
 llvm/test/Analysis/CtxProfAnalysis/inline.ll  | 34 ++++----
 .../Analysis/CtxProfAnalysis/json_equals.py   | 15 ----
 llvm/test/Analysis/CtxProfAnalysis/load.ll    | 33 ++-----
 .../Utils/CallPromotionUtilsTest.cpp          | 70 +++++++--------
 11 files changed, 174 insertions(+), 200 deletions(-)
 delete mode 100644 llvm/test/Analysis/CtxProfAnalysis/json_equals.py

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index be040d5eca5f31..ea292250c63a92 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -140,7 +140,7 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {
 class CtxProfAnalysisPrinterPass
     : public PassInfoMixin<CtxProfAnalysisPrinterPass> {
 public:
-  enum class PrintMode { Everything, JSON };
+  enum class PrintMode { Everything, YAML };
   explicit CtxProfAnalysisPrinterPass(raw_ostream &OS);
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
index d358041e3a001f..2796eec1299183 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
@@ -183,5 +183,7 @@ class PGOCtxProfileReader final {
 
   Expected<std::map<GlobalValue::GUID, PGOCtxProfContext>> loadContexts();
 };
+
+void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfContext::CallTargetMapTy &);
 } // namespace llvm
 #endif
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
index f6158609c12855..43a190ae0aa05f 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
@@ -81,14 +81,6 @@ class PGOCtxProfileWriter final {
   static constexpr StringRef ContainerMagic = "CTXP";
 };
 
-/// Representation of the context node suitable for yaml / json serialization /
-/// deserialization.
-struct SerializableCtxRepresentation {
-  ctx_profile::GUID Guid = 0;
-  std::vector<uint64_t> Counters;
-  std::vector<std::vector<SerializableCtxRepresentation>> Callsites;
-};
-
 Error createCtxProfFromYAML(StringRef Profile, raw_ostream &Out);
 } // namespace llvm
 #endif
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 825e517cd09f52..bbf29e0d370e7e 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -19,7 +19,6 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/ProfileData/PGOCtxProfReader.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 #define DEBUG_TYPE "ctx_prof"
@@ -31,49 +30,13 @@ cl::opt<std::string>
 
 static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(
     "ctx-profile-printer-level",
-    cl::init(CtxProfAnalysisPrinterPass::PrintMode::JSON), cl::Hidden,
+    cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden,
     cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything,
                           "everything", "print everything - most verbose"),
-               clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::JSON, "json",
-                          "just the json representation of the profile")),
+               clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml",
+                          "just the yaml representation of the profile")),
     cl::desc("Verbosity level of the contextual profile printer pass."));
 
-namespace llvm {
-namespace json {
-Value toJSON(const PGOCtxProfContext &P) {
-  Object Ret;
-  Ret["Guid"] = P.guid();
-  Ret["Counters"] = Array(P.counters());
-  if (P.callsites().empty())
-    return Ret;
-  auto AllCS =
-      ::llvm::map_range(P.callsites(), [](const auto &P) { return P.first; });
-  auto MaxIt = ::llvm::max_element(AllCS);
-  assert(MaxIt != AllCS.end() && "We should have a max value because the "
-                                 "callsites collection is not empty.");
-  Array CSites;
-  // Iterate to, and including, the maximum index.
-  for (auto I = 0U, Max = *MaxIt; I <= Max; ++I) {
-    CSites.push_back(Array());
-    Array &Targets = *CSites.back().getAsArray();
-    if (P.hasCallsite(I))
-      for (const auto &[_, Ctx] : P.callsite(I))
-        Targets.push_back(toJSON(Ctx));
-  }
-  Ret["Callsites"] = std::move(CSites);
-
-  return Ret;
-}
-
-Value toJSON(const PGOCtxProfContext::CallTargetMapTy &P) {
-  Array Ret;
-  for (const auto &[_, Ctx] : P)
-    Ret.push_back(toJSON(Ctx));
-  return Ret;
-}
-} // namespace json
-} // namespace llvm
-
 const char *AssignGUIDPass::GUIDMetadataName = "guid";
 
 PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
@@ -214,15 +177,13 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
          << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
   }
 
-  const auto JSONed = ::llvm::json::toJSON(C.profiles());
-
   if (Mode == PrintMode::Everything)
     OS << "\nCurrent Profile:\n";
-  OS << formatv("{0:2}", JSONed);
-  if (Mode == PrintMode::JSON)
+  convertCtxProfToYaml(OS, C.profiles());
+  OS << "\n";
+  if (Mode == PrintMode::YAML)
     return PreservedAnalyses::all();
 
-  OS << "\n";
   OS << "\nFlat Profile:\n";
   auto Flat = C.flatten();
   for (const auto &[Guid, Counters] : Flat) {
diff --git a/llvm/lib/ProfileData/PGOCtxProfReader.cpp b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
index eb89d7c2f6d1d9..ed2686fa0d7a06 100644
--- a/llvm/lib/ProfileData/PGOCtxProfReader.cpp
+++ b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
@@ -17,6 +17,10 @@
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/PGOCtxProfWriter.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <iterator>
+#include <utility>
 
 using namespace llvm;
 
@@ -176,3 +180,86 @@ PGOCtxProfileReader::loadContexts() {
   }
   return std::move(Ret);
 }
+
+namespace {
+// We want to pass `const` values PGOCtxProfContext references to the yaml
+// converter, and the regular yaml mapping APIs are designed to handle both
+// serialization and deserialization, which prevents using const for
+// serialization. Using an intermediate datastructure is overkill, both
+// space-wise and design complexity-wise. Instead, we use the lower-level APIs.
+void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx);
+
+void toYaml(yaml::Output &Out,
+            const PGOCtxProfContext::CallTargetMapTy &CallTargets) {
+  Out.beginSequence();
+  size_t Index = 0;
+  void *SaveData = nullptr;
+  for (const auto &[_, Ctx] : CallTargets) {
+    Out.preflightElement(Index++, SaveData);
+    toYaml(Out, Ctx);
+    Out.postflightElement(nullptr);
+  }
+  Out.endSequence();
+}
+
+void toYaml(yaml::Output &Out,
+            const PGOCtxProfContext::CallsiteMapTy &Callsites) {
+  auto AllCS = ::llvm::make_first_range(Callsites);
+  auto MaxIt = ::llvm::max_element(AllCS);
+  assert(MaxIt != AllCS.end() && "We should have a max value because the "
+                                 "callsites collection is not empty.");
+  void *SaveData = nullptr;
+  Out.beginSequence();
+  for (auto I = 0U; I <= *MaxIt; ++I) {
+    Out.preflightElement(I, SaveData);
+    auto It = Callsites.find(I);
+    if (It == Callsites.end()) {
+      // This will produce a `[ ]` sequence, which is what we want here.
+      Out.beginFlowSequence();
+      Out.endFlowSequence();
+    } else {
+      toYaml(Out, It->second);
+    }
+    Out.postflightElement(nullptr);
+  }
+  Out.endSequence();
+}
+
+void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) {
+  yaml::EmptyContext Empty;
+  Out.beginMapping();
+  void *SaveInfo = nullptr;
+  bool UseDefault = false;
+  {
+    Out.preflightKey("Guid", /*Required=*/true, /*SameAsDefault=*/false,
+                     UseDefault, SaveInfo);
+    auto Guid = Ctx.guid();
+    yaml::yamlize(Out, Guid, true, Empty);
+    Out.postflightKey(nullptr);
+  }
+  {
+    Out.preflightKey("Counters", true, false, UseDefault, SaveInfo);
+    Out.beginFlowSequence();
+    for (size_t I = 0U, E = Ctx.counters().size(); I < E; ++I) {
+      Out.preflightFlowElement(I, SaveInfo);
+      uint64_t V = Ctx.counters()[I];
+      yaml::yamlize(Out, V, true, Empty);
+      Out.postflightFlowElement(SaveInfo);
+    }
+    Out.endFlowSequence();
+    Out.postflightKey(nullptr);
+  }
+  if (!Ctx.callsites().empty()) {
+    Out.preflightKey("Callsites", true, false, UseDefault, SaveInfo);
+    toYaml(Out, Ctx.callsites());
+    Out.postflightKey(nullptr);
+  }
+  Out.endMapping();
+}
+} // namespace
+
+void llvm::convertCtxProfToYaml(raw_ostream &OS,
+                         const PGOCtxProfContext::CallTargetMapTy &Profiles) {
+  yaml::Output Out(OS);
+  toYaml(Out, Profiles);
+}
\ No newline at end of file
diff --git a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
index d22aadd6bd7eb0..3d3da848174895 100644
--- a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
+++ b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
@@ -14,7 +14,6 @@
 #include "llvm/Bitstream/BitCodeEnums.h"
 #include "llvm/ProfileData/CtxInstrContextNode.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
@@ -89,6 +88,15 @@ void PGOCtxProfileWriter::write(const ContextNode &RootNode) {
 }
 
 namespace {
+
+/// Representation of the context node suitable for yaml serialization /
+/// deserialization.
+struct SerializableCtxRepresentation {
+  ctx_profile::GUID Guid = 0;
+  std::vector<uint64_t> Counters;
+  std::vector<std::vector<SerializableCtxRepresentation>> Callsites;
+};
+
 ctx_profile::ContextNode *
 createNode(std::vector<std::unique_ptr<char[]>> &Nodes,
            const std::vector<SerializableCtxRepresentation> &DCList);
diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
index 5a8a2f4cad84b7..49d34e71c5d087 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
@@ -88,54 +88,20 @@ Function Info:
 10507721908651011566 : entrypoint. MaxCounterID: 1. MaxCallsiteID: 2
 
 Current Profile:
-[
-  {
-    "Callsites": [
-      [
-        {
-          "Callsites": [
-            [
-              {
-                "Counters": [
-                  10,
-                  7
-                ],
-                "Guid": 3087265239403591524
-              }
-            ]
-          ],
-          "Counters": [
-            7
-          ],
-          "Guid": 2072045998141807037
-        }
-      ],
-      [
-        {
-          "Callsites": [
-            [
-              {
-                "Counters": [
-                  1,
-                  2
-                ],
-                "Guid": 3087265239403591524
-              }
-            ]
-          ],
-          "Counters": [
-            2
-          ],
-          "Guid": 4197650231481825559
-        }
-      ]
-    ],
-    "Counters": [
-      1
-    ],
-    "Guid": 10507721908651011566
-  }
-]
+
+- Guid:            10507721908651011566
+  Counters:        [ 1 ]
+  Callsites:
+    - - Guid:            2072045998141807037
+        Counters:        [ 7 ]
+        Callsites:
+          - - Guid:            3087265239403591524
+              Counters:        [ 10, 7 ]
+    - - Guid:            4197650231481825559
+        Counters:        [ 2 ]
+        Callsites:
+          - - Guid:            3087265239403591524
+              Counters:        [ 1, 2 ]
 
 Flat Profile:
 2072045998141807037 : 7 
diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
index 6c1e199c2ba1c0..2b774ebfab5d0a 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
@@ -1,11 +1,12 @@
+; REQUIRES: x86_64-linux
 ; RUN: rm -rf %t
 ; RUN: split-file %s %t
 ; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
 
 ; RUN: opt -passes='module-inline,print<ctx-prof-analysis>' -ctx-profile-printer-level=everything %t/module.ll -S \
-; RUN:   -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=json \
-; RUN:   -o - 2> %t/profile-final.txt | FileCheck %s
-; RUN: %python %S/json_equals.py %t/profile-final.txt %t/expected.json
+; RUN:   -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=yaml \
+; RUN:   -o - 2> %t/profile-final.yaml | FileCheck %s
+; RUN: diff %t/profile-final.yaml %t/expected.yaml
 
 ; There are 2 calls to @a from @entrypoint. We only inline the one callsite
 ; marked as alwaysinline, the rest are blocked (marked noinline). After the inline,
@@ -109,17 +110,16 @@ define i32 @b() !guid !2 {
                   Callsites:  -
                                 - Guid: 1002
                                   Counters: [500]
-;--- expected.json
-[
-  { "Guid": 1000,
-    "Counters": [10, 2, 8, 100],
-    "Callsites": [
-      [],
-      [ { "Guid": 1001,
-          "Counters": [8, 500],
-          "Callsites": [[{"Guid": 1002, "Counters": [500]}]]}
-      ],
-      [{ "Guid": 1002, "Counters": [100]}]
-    ]
-  }
-]
+;--- expected.yaml
+
+- Guid:            1000
+  Counters:        [ 10, 2, 8, 100 ]
+  Callsites:
+    - [  ]
+    - - Guid:            1001
+        Counters:        [ 8, 500 ]
+        Callsites:
+          - - Guid:            1002
+              Counters:        [ 500 ]
+    - - Guid:            1002
+        Counters:        [ 100 ]
diff --git a/llvm/test/Analysis/CtxProfAnalysis/json_equals.py b/llvm/test/Analysis/CtxProfAnalysis/json_equals.py
deleted file mode 100644
index 8b94dda5528c5b..00000000000000
--- a/llvm/test/Analysis/CtxProfAnalysis/json_equals.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import json
-import sys
-
-
-def to_json(fname: str):
-    with open(fname) as f:
-        return json.load(f)
-
-
-a = to_json(sys.argv[1])
-b = to_json(sys.argv[2])
-
-if a == b:
-    exit(0)
-exit(1)
diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll
index 62c6344ed3fec0..2618903bd62a82 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/load.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll
@@ -40,31 +40,14 @@ Function Info:
 12074870348631550642 : another_entrypoint_no_callees. MaxCounterID: 1. MaxCallsiteID: 0
 
 Current Profile:
-[
-  {
-    "Callsites": [
-      [
-        {
-          "Counters": [
-            6,
-            7
-          ],
-          "Guid": 728453322856651412
-        }
-      ]
-    ],
-    "Counters": [
-      1
-    ],
-    "Guid": 11872291593386833696
-  },
-  {
-    "Counters": [
-      5
-    ],
-    "Guid": 12074870348631550642
-  }
-]
+
+- Guid:            11872291593386833696
+  Counters:        [ 1 ]
+  Callsites:
+    - - Guid:            728453322856651412
+        Counters:        [ 6, 7 ]
+- Guid:            12074870348631550642
+  Counters:        [ 5 ]
 
 Flat Profile:
 728453322856651412 : 6 7 
diff --git a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
index 4420a6d0654993..57a8f75a3a31aa 100644
--- a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
@@ -18,7 +18,6 @@
 #include "llvm/IR/PassInstrumentation.h"
 #include "llvm/ProfileData/PGOCtxProfReader.h"
 #include "llvm/ProfileData/PGOCtxProfWriter.h"
-#include "llvm/Support/JSON.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
@@ -572,43 +571,34 @@ define i32 @f4() !guid !3 {
   raw_string_ostream OS(Str);
   CtxProfAnalysisPrinterPass Printer(OS);
   Printer.run(*M, MAM);
-  const char *Expected = R"json(
-  [
-  {
-    "Guid": 1000,
-    "Counters": [1, 11, 22],
-    "Callsites": [
-      [{ "Guid": 1001,
-          "Counters": [10]}, 
-        { "Guid": 1003,
-          "Counters": [12]
-        }], 
-        [{ "Guid": 1002,
-          "Counters": [11],
-          "Callsites": [
-          [{ "Guid": 1004,
-            "Counters": [13] }]]}]]
-  },
-  {
-    "Guid": 1005,
-    "Counters": [2],
-    "Callsites": [
-      [{ "Guid": 1000,
-         "Counters": [1, 102, 204],
-         "Callsites": [
-            [{ "Guid": 1001,
-               "Counters": [101]}, 
-             { "Guid": 1003,
-               "Counters": [103]}],
-            [{ "Guid": 1002,
-               "Counters": [102],
-               "Callsites": [
-            [{ "Guid": 1004,
-               "Counters": [104]}]]}]]}]]}
-])json";
-  auto ExpectedJSON = json::parse(Expected);
-  ASSERT_TRUE(!!ExpectedJSON);
-  auto ProducedJSON = json::parse(Str);
-  ASSERT_TRUE(!!ProducedJSON);
-  EXPECT_EQ(*ProducedJSON, *ExpectedJSON);
+  const char *Expected = R"yaml(
+- Guid:            1000
+  Counters:        [ 1, 11, 22 ]
+  Callsites:
+    - - Guid:            1001
+        Counters:        [ 10 ]
+      - Guid:            1003
+        Counters:        [ 12 ]
+    - - Guid:            1002
+        Counters:        [ 11 ]
+        Callsites:
+          - - Guid:            1004
+              Counters:        [ 13 ]
+- Guid:            1005
+  Counters:        [ 2 ]
+  Callsites:
+    - - Guid:            1000
+        Counters:        [ 1, 102, 204 ]
+        Callsites:
+          - - Guid:            1001
+              Counters:        [ 101 ]
+            - Guid:            1003
+              Counters:        [ 103 ]
+          - - Guid:            1002
+              Counters:        [ 102 ]
+              Callsites:
+                - - Guid:            1004
+                    Counters:        [ 104 ]
+)yaml";
+  EXPECT_EQ(Expected, Str);
 }



More information about the llvm-commits mailing list