[llvm] [ctxprof] dump profiles using yaml (for testing) (PR #123108)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 15 11:13:37 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Mircea Trofin (mtrofin)
<details>
<summary>Changes</summary>
This is a follow-up from PR #<!-- -->122545, which enabled converting yaml to contextual profiles.
This change uses the lower level yaml APIs because:
- the mapping APIs `llvm::yaml` offers don't work with `const` values, because they (the APIs) want to enable both serialization and deserialization
- building a helper data structure would be an alternative, but it'd be either memory-consuming or overly-complex design, given the recursive nature of the contextual profiles.
---
Full diff: https://github.com/llvm/llvm-project/pull/123108.diff
11 Files Affected:
- (modified) llvm/include/llvm/Analysis/CtxProfAnalysis.h (+1-1)
- (modified) llvm/include/llvm/ProfileData/PGOCtxProfReader.h (+2)
- (modified) llvm/include/llvm/ProfileData/PGOCtxProfWriter.h (-8)
- (modified) llvm/lib/Analysis/CtxProfAnalysis.cpp (+6-45)
- (modified) llvm/lib/ProfileData/PGOCtxProfReader.cpp (+88)
- (modified) llvm/lib/ProfileData/PGOCtxProfWriter.cpp (+9-1)
- (modified) llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll (+14-48)
- (modified) llvm/test/Analysis/CtxProfAnalysis/inline.ll (+16-17)
- (removed) llvm/test/Analysis/CtxProfAnalysis/json_equals.py (-15)
- (modified) llvm/test/Analysis/CtxProfAnalysis/load.ll (+8-25)
- (modified) llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp (+30-40)
``````````diff
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index be040d5eca5f31..ea292250c63a92 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -140,7 +140,7 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {
class CtxProfAnalysisPrinterPass
: public PassInfoMixin<CtxProfAnalysisPrinterPass> {
public:
- enum class PrintMode { Everything, JSON };
+ enum class PrintMode { Everything, YAML };
explicit CtxProfAnalysisPrinterPass(raw_ostream &OS);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
index d358041e3a001f..211596017944b6 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
@@ -183,5 +183,7 @@ class PGOCtxProfileReader final {
Expected<std::map<GlobalValue::GUID, PGOCtxProfContext>> loadContexts();
};
+
+void convertToYaml(raw_ostream &OS, const PGOCtxProfContext::CallTargetMapTy &);
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
index f6158609c12855..43a190ae0aa05f 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
@@ -81,14 +81,6 @@ class PGOCtxProfileWriter final {
static constexpr StringRef ContainerMagic = "CTXP";
};
-/// Representation of the context node suitable for yaml / json serialization /
-/// deserialization.
-struct SerializableCtxRepresentation {
- ctx_profile::GUID Guid = 0;
- std::vector<uint64_t> Counters;
- std::vector<std::vector<SerializableCtxRepresentation>> Callsites;
-};
-
Error createCtxProfFromYAML(StringRef Profile, raw_ostream &Out);
} // namespace llvm
#endif
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 825e517cd09f52..93e3c7d4bea97e 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/ProfileData/PGOCtxProfReader.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"
#define DEBUG_TYPE "ctx_prof"
@@ -31,49 +30,13 @@ cl::opt<std::string>
static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(
"ctx-profile-printer-level",
- cl::init(CtxProfAnalysisPrinterPass::PrintMode::JSON), cl::Hidden,
+ cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden,
cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything,
"everything", "print everything - most verbose"),
- clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::JSON, "json",
- "just the json representation of the profile")),
+ clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml",
+ "just the yaml representation of the profile")),
cl::desc("Verbosity level of the contextual profile printer pass."));
-namespace llvm {
-namespace json {
-Value toJSON(const PGOCtxProfContext &P) {
- Object Ret;
- Ret["Guid"] = P.guid();
- Ret["Counters"] = Array(P.counters());
- if (P.callsites().empty())
- return Ret;
- auto AllCS =
- ::llvm::map_range(P.callsites(), [](const auto &P) { return P.first; });
- auto MaxIt = ::llvm::max_element(AllCS);
- assert(MaxIt != AllCS.end() && "We should have a max value because the "
- "callsites collection is not empty.");
- Array CSites;
- // Iterate to, and including, the maximum index.
- for (auto I = 0U, Max = *MaxIt; I <= Max; ++I) {
- CSites.push_back(Array());
- Array &Targets = *CSites.back().getAsArray();
- if (P.hasCallsite(I))
- for (const auto &[_, Ctx] : P.callsite(I))
- Targets.push_back(toJSON(Ctx));
- }
- Ret["Callsites"] = std::move(CSites);
-
- return Ret;
-}
-
-Value toJSON(const PGOCtxProfContext::CallTargetMapTy &P) {
- Array Ret;
- for (const auto &[_, Ctx] : P)
- Ret.push_back(toJSON(Ctx));
- return Ret;
-}
-} // namespace json
-} // namespace llvm
-
const char *AssignGUIDPass::GUIDMetadataName = "guid";
PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
@@ -214,15 +177,13 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
<< ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
}
- const auto JSONed = ::llvm::json::toJSON(C.profiles());
-
if (Mode == PrintMode::Everything)
OS << "\nCurrent Profile:\n";
- OS << formatv("{0:2}", JSONed);
- if (Mode == PrintMode::JSON)
+ convertToYaml(OS, C.profiles());
+ OS << "\n";
+ if (Mode == PrintMode::YAML)
return PreservedAnalyses::all();
- OS << "\n";
OS << "\nFlat Profile:\n";
auto Flat = C.flatten();
for (const auto &[Guid, Counters] : Flat) {
diff --git a/llvm/lib/ProfileData/PGOCtxProfReader.cpp b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
index eb89d7c2f6d1d9..aa78ce03eff989 100644
--- a/llvm/lib/ProfileData/PGOCtxProfReader.cpp
+++ b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
@@ -17,6 +17,10 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/PGOCtxProfWriter.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -176,3 +180,87 @@ PGOCtxProfileReader::loadContexts() {
}
return std::move(Ret);
}
+
+namespace {
+// We want to pass `const` values PGOCtxProfContext references to the yaml
+// converter, and the regular yaml mapping APIs are designed to handle both
+// serialization and deserialization, which prevents using const for
+// serialization. Using an intermediate datastructure is overkill, both
+// space-wise and design complexity-wise. Instead, we use the lower-level APIs.
+void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx);
+
+void toYaml(yaml::Output &Out,
+ const PGOCtxProfContext::CallTargetMapTy &CallTargets) {
+ Out.beginSequence();
+ size_t Index = 0;
+ void *SaveData = nullptr;
+ for (const auto &[_, Ctx] : CallTargets) {
+ Out.preflightElement(Index++, SaveData);
+ toYaml(Out, Ctx);
+ Out.postflightElement(nullptr);
+ }
+ Out.endSequence();
+}
+
+void toYaml(yaml::Output &Out,
+ const PGOCtxProfContext::CallsiteMapTy &Callsites) {
+ auto AllCS =
+ ::llvm::map_range(Callsites, [](const auto &P) { return P.first; });
+ auto MaxIt = ::llvm::max_element(AllCS);
+ assert(MaxIt != AllCS.end() && "We should have a max value because the "
+ "callsites collection is not empty.");
+ void *SaveData = nullptr;
+ Out.beginSequence();
+ for (auto I = 0U; I <= *MaxIt; ++I) {
+ Out.preflightElement(I, SaveData);
+ auto It = Callsites.find(I);
+ if (It == Callsites.end()) {
+ // This will produce a `[ ]` sequence, which is what we want here.
+ Out.beginFlowSequence();
+ Out.endFlowSequence();
+ } else {
+ toYaml(Out, It->second);
+ }
+ Out.postflightElement(nullptr);
+ }
+ Out.endSequence();
+}
+
+void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) {
+ yaml::EmptyContext Empty;
+ Out.beginMapping();
+ void *SaveInfo = nullptr;
+ bool UseDefault = false;
+ {
+ Out.preflightKey("Guid", /*Required=*/true, /*SameAsDefault=*/false,
+ UseDefault, SaveInfo);
+ auto Guid = Ctx.guid();
+ yaml::yamlize(Out, Guid, true, Empty);
+ Out.postflightKey(nullptr);
+ }
+ {
+ Out.preflightKey("Counters", true, false, UseDefault, SaveInfo);
+ Out.beginFlowSequence();
+ for (size_t I = 0U, E = Ctx.counters().size(); I < E; ++I) {
+ Out.preflightFlowElement(I, SaveInfo);
+ uint64_t V = Ctx.counters()[I];
+ yaml::yamlize(Out, V, true, Empty);
+ Out.postflightFlowElement(SaveInfo);
+ }
+ Out.endFlowSequence();
+ Out.postflightKey(nullptr);
+ }
+ if (!Ctx.callsites().empty()) {
+ Out.preflightKey("Callsites", true, false, UseDefault, SaveInfo);
+ toYaml(Out, Ctx.callsites());
+ Out.postflightKey(nullptr);
+ }
+ Out.endMapping();
+}
+} // namespace
+
+void llvm::convertToYaml(raw_ostream &OS,
+ const PGOCtxProfContext::CallTargetMapTy &Profiles) {
+ yaml::Output Out(OS);
+ toYaml(Out, Profiles);
+}
\ No newline at end of file
diff --git a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
index d22aadd6bd7eb0..3d3da848174895 100644
--- a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
+++ b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
@@ -14,7 +14,6 @@
#include "llvm/Bitstream/BitCodeEnums.h"
#include "llvm/ProfileData/CtxInstrContextNode.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
@@ -89,6 +88,15 @@ void PGOCtxProfileWriter::write(const ContextNode &RootNode) {
}
namespace {
+
+/// Representation of the context node suitable for yaml serialization /
+/// deserialization.
+struct SerializableCtxRepresentation {
+ ctx_profile::GUID Guid = 0;
+ std::vector<uint64_t> Counters;
+ std::vector<std::vector<SerializableCtxRepresentation>> Callsites;
+};
+
ctx_profile::ContextNode *
createNode(std::vector<std::unique_ptr<char[]>> &Nodes,
const std::vector<SerializableCtxRepresentation> &DCList);
diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
index 5a8a2f4cad84b7..49d34e71c5d087 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
@@ -88,54 +88,20 @@ Function Info:
10507721908651011566 : entrypoint. MaxCounterID: 1. MaxCallsiteID: 2
Current Profile:
-[
- {
- "Callsites": [
- [
- {
- "Callsites": [
- [
- {
- "Counters": [
- 10,
- 7
- ],
- "Guid": 3087265239403591524
- }
- ]
- ],
- "Counters": [
- 7
- ],
- "Guid": 2072045998141807037
- }
- ],
- [
- {
- "Callsites": [
- [
- {
- "Counters": [
- 1,
- 2
- ],
- "Guid": 3087265239403591524
- }
- ]
- ],
- "Counters": [
- 2
- ],
- "Guid": 4197650231481825559
- }
- ]
- ],
- "Counters": [
- 1
- ],
- "Guid": 10507721908651011566
- }
-]
+
+- Guid: 10507721908651011566
+ Counters: [ 1 ]
+ Callsites:
+ - - Guid: 2072045998141807037
+ Counters: [ 7 ]
+ Callsites:
+ - - Guid: 3087265239403591524
+ Counters: [ 10, 7 ]
+ - - Guid: 4197650231481825559
+ Counters: [ 2 ]
+ Callsites:
+ - - Guid: 3087265239403591524
+ Counters: [ 1, 2 ]
Flat Profile:
2072045998141807037 : 7
diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
index 6c1e199c2ba1c0..e676e73ed8ec6f 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
@@ -3,9 +3,9 @@
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
; RUN: opt -passes='module-inline,print<ctx-prof-analysis>' -ctx-profile-printer-level=everything %t/module.ll -S \
-; RUN: -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=json \
-; RUN: -o - 2> %t/profile-final.txt | FileCheck %s
-; RUN: %python %S/json_equals.py %t/profile-final.txt %t/expected.json
+; RUN: -use-ctx-profile=%t/profile.ctxprofdata -ctx-profile-printer-level=yaml \
+; RUN: -o - 2> %t/profile-final.yaml | FileCheck %s
+; RUN: diff %t/profile-final.yaml %t/expected.yaml
; There are 2 calls to @a from @entrypoint. We only inline the one callsite
; marked as alwaysinline, the rest are blocked (marked noinline). After the inline,
@@ -109,17 +109,16 @@ define i32 @b() !guid !2 {
Callsites: -
- Guid: 1002
Counters: [500]
-;--- expected.json
-[
- { "Guid": 1000,
- "Counters": [10, 2, 8, 100],
- "Callsites": [
- [],
- [ { "Guid": 1001,
- "Counters": [8, 500],
- "Callsites": [[{"Guid": 1002, "Counters": [500]}]]}
- ],
- [{ "Guid": 1002, "Counters": [100]}]
- ]
- }
-]
+;--- expected.yaml
+
+- Guid: 1000
+ Counters: [ 10, 2, 8, 100 ]
+ Callsites:
+ - [ ]
+ - - Guid: 1001
+ Counters: [ 8, 500 ]
+ Callsites:
+ - - Guid: 1002
+ Counters: [ 500 ]
+ - - Guid: 1002
+ Counters: [ 100 ]
diff --git a/llvm/test/Analysis/CtxProfAnalysis/json_equals.py b/llvm/test/Analysis/CtxProfAnalysis/json_equals.py
deleted file mode 100644
index 8b94dda5528c5b..00000000000000
--- a/llvm/test/Analysis/CtxProfAnalysis/json_equals.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import json
-import sys
-
-
-def to_json(fname: str):
- with open(fname) as f:
- return json.load(f)
-
-
-a = to_json(sys.argv[1])
-b = to_json(sys.argv[2])
-
-if a == b:
- exit(0)
-exit(1)
diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll
index 62c6344ed3fec0..2618903bd62a82 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/load.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll
@@ -40,31 +40,14 @@ Function Info:
12074870348631550642 : another_entrypoint_no_callees. MaxCounterID: 1. MaxCallsiteID: 0
Current Profile:
-[
- {
- "Callsites": [
- [
- {
- "Counters": [
- 6,
- 7
- ],
- "Guid": 728453322856651412
- }
- ]
- ],
- "Counters": [
- 1
- ],
- "Guid": 11872291593386833696
- },
- {
- "Counters": [
- 5
- ],
- "Guid": 12074870348631550642
- }
-]
+
+- Guid: 11872291593386833696
+ Counters: [ 1 ]
+ Callsites:
+ - - Guid: 728453322856651412
+ Counters: [ 6, 7 ]
+- Guid: 12074870348631550642
+ Counters: [ 5 ]
Flat Profile:
728453322856651412 : 6 7
diff --git a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
index 4420a6d0654993..57a8f75a3a31aa 100644
--- a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
@@ -18,7 +18,6 @@
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/ProfileData/PGOCtxProfReader.h"
#include "llvm/ProfileData/PGOCtxProfWriter.h"
-#include "llvm/Support/JSON.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Testing/Support/SupportHelpers.h"
@@ -572,43 +571,34 @@ define i32 @f4() !guid !3 {
raw_string_ostream OS(Str);
CtxProfAnalysisPrinterPass Printer(OS);
Printer.run(*M, MAM);
- const char *Expected = R"json(
- [
- {
- "Guid": 1000,
- "Counters": [1, 11, 22],
- "Callsites": [
- [{ "Guid": 1001,
- "Counters": [10]},
- { "Guid": 1003,
- "Counters": [12]
- }],
- [{ "Guid": 1002,
- "Counters": [11],
- "Callsites": [
- [{ "Guid": 1004,
- "Counters": [13] }]]}]]
- },
- {
- "Guid": 1005,
- "Counters": [2],
- "Callsites": [
- [{ "Guid": 1000,
- "Counters": [1, 102, 204],
- "Callsites": [
- [{ "Guid": 1001,
- "Counters": [101]},
- { "Guid": 1003,
- "Counters": [103]}],
- [{ "Guid": 1002,
- "Counters": [102],
- "Callsites": [
- [{ "Guid": 1004,
- "Counters": [104]}]]}]]}]]}
-])json";
- auto ExpectedJSON = json::parse(Expected);
- ASSERT_TRUE(!!ExpectedJSON);
- auto ProducedJSON = json::parse(Str);
- ASSERT_TRUE(!!ProducedJSON);
- EXPECT_EQ(*ProducedJSON, *ExpectedJSON);
+ const char *Expected = R"yaml(
+- Guid: 1000
+ Counters: [ 1, 11, 22 ]
+ Callsites:
+ - - Guid: 1001
+ Counters: [ 10 ]
+ - Guid: 1003
+ Counters: [ 12 ]
+ - - Guid: 1002
+ Counters: [ 11 ]
+ Callsites:
+ - - Guid: 1004
+ Counters: [ 13 ]
+- Guid: 1005
+ Counters: [ 2 ]
+ Callsites:
+ - - Guid: 1000
+ Counters: [ 1, 102, 204 ]
+ Callsites:
+ - - Guid: 1001
+ Counters: [ 101 ]
+ - Guid: 1003
+ Counters: [ 103 ]
+ - - Guid: 1002
+ Counters: [ 102 ]
+ Callsites:
+ - - Guid: 1004
+ Counters: [ 104 ]
+)yaml";
+ EXPECT_EQ(Expected, Str);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/123108
More information about the llvm-commits
mailing list