[llvm] a737b87 - [ctx_prof] test tool: generate ctxprof bistream from json (#100379)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 25 12:52:53 PDT 2024


Author: Mircea Trofin
Date: 2024-07-25T15:52:49-04:00
New Revision: a737b8704c031310460d492cef90eee5054cabd7

URL: https://github.com/llvm/llvm-project/commit/a737b8704c031310460d492cef90eee5054cabd7
DIFF: https://github.com/llvm/llvm-project/commit/a737b8704c031310460d492cef90eee5054cabd7.diff

LOG: [ctx_prof] test tool: generate ctxprof bistream from json (#100379)

This is a tool to simplify testing. It generates a valid contextual profile file from a json representation.

The tool is authored to allow for future evolution, e.g. if we want to support profile merging or other tasks, not necessarily scoped to testing.

Added: 
    llvm/test/tools/llvm-ctxprof-util/Inputs/bad.json
    llvm/test/tools/llvm-ctxprof-util/Inputs/empty.json
    llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-bad-subctx.json
    llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-counters.json
    llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-ctx.json
    llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-vector.json
    llvm/test/tools/llvm-ctxprof-util/Inputs/valid.json
    llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test
    llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test
    llvm/tools/llvm-ctxprof-util/CMakeLists.txt
    llvm/tools/llvm-ctxprof-util/llvm-ctxprof-util.cpp

Modified: 
    llvm/test/CMakeLists.txt
    llvm/test/lit.cfg.py
    llvm/tools/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 6b7f2b58e603e..8abc153336251 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -76,6 +76,7 @@ set(LLVM_TEST_DEPENDS
           llvm-cfi-verify
           llvm-config
           llvm-cov
+          llvm-ctxprof-util
           llvm-cvtres
           llvm-cxxdump
           llvm-cxxfilt

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index fe1262893212f..e5e3dc7e1b4bd 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -182,6 +182,7 @@ def get_asan_rtlib():
         "llvm-bitcode-strip",
         "llvm-config",
         "llvm-cov",
+        "llvm-ctxprof-util",
         "llvm-cxxdump",
         "llvm-cvtres",
         "llvm-debuginfod-find",

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/bad.json b/llvm/test/tools/llvm-ctxprof-util/Inputs/bad.json
new file mode 100644
index 0000000000000..35c169002386e
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/bad.json
@@ -0,0 +1 @@
+[{]

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/empty.json b/llvm/test/tools/llvm-ctxprof-util/Inputs/empty.json
new file mode 100644
index 0000000000000..fe51488c7066f
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/empty.json
@@ -0,0 +1 @@
+[]

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-bad-subctx.json b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-bad-subctx.json
new file mode 100644
index 0000000000000..b47e0ee1a04ba
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-bad-subctx.json
@@ -0,0 +1,8 @@
+[{
+  "Guid": 123,
+  "Counters": [1, 2],
+  "Callsites":
+  [
+    {"Guid": 1}
+  ]
+}]

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-counters.json b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-counters.json
new file mode 100644
index 0000000000000..95cdd45a5a0f7
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-counters.json
@@ -0,0 +1,5 @@
+[
+  {
+    "Guid": 1231
+  }
+]

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-ctx.json b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-ctx.json
new file mode 100644
index 0000000000000..93d51406d63fb
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-ctx.json
@@ -0,0 +1 @@
+[{}]

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-vector.json b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-vector.json
new file mode 100644
index 0000000000000..0967ef424bce6
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/invalid-no-vector.json
@@ -0,0 +1 @@
+{}

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/valid.json b/llvm/test/tools/llvm-ctxprof-util/Inputs/valid.json
new file mode 100644
index 0000000000000..d4a6da4142110
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/valid.json
@@ -0,0 +1,47 @@
+[
+  {
+    "Guid": 1000,
+    "Counters": [
+      1,
+      2,
+      3
+    ],
+    "Callsites": [
+      [],
+      [
+        {
+          "Guid": 2000,
+          "Counters": [
+            4,
+            5
+          ]
+        },
+        {
+          "Guid": 18446744073709551613,
+          "Counters": [
+            6,
+            7,
+            8
+          ]
+        }
+      ],
+      [
+        {
+          "Guid": 3000,
+          "Counters": [
+            40,
+            50
+          ]
+        }
+      ]
+    ]
+  },
+  {
+    "Guid": 18446744073709551612,
+    "Counters": [
+      5,
+      9,
+      10
+    ]
+  }
+]

diff  --git a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test
new file mode 100644
index 0000000000000..97598c11979fa
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test
@@ -0,0 +1,22 @@
+; RUN: not llvm-ctxprof-util nofile.json 2>&1 | FileCheck %s --check-prefix=NO_CMD
+; RUN: not llvm-ctxprof-util invalidCmd --input nofile.json 2>&1 | FileCheck %s --check-prefix=INVALID_CMD
+; RUN: not llvm-ctxprof-util fromJSON nofile.json 2>&1 | FileCheck %s --check-prefix=NO_FLAG
+; RUN: not llvm-ctxprof-util fromJSON --input nofile.json 2>&1 | FileCheck %s --check-prefix=NO_FILE
+; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/bad.json 2>&1 | FileCheck %s --check-prefix=BAD_JSON
+; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/invalid-no-vector.json 2>&1 | FileCheck %s --check-prefix=NO_VECTOR
+; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/invalid-no-ctx.json 2>&1 | FileCheck %s --check-prefix=NO_CTX
+; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/invalid-no-counters.json 2>&1 | FileCheck %s --check-prefix=NO_COUNTERS
+; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/invalid-bad-subctx.json 2>&1 | FileCheck %s --check-prefix=BAD_SUBCTX
+; RUN: rm -rf %t
+; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/valid.json --output %t/output.bitstream 2>&1 | FileCheck %s --check-prefix=NO_DIR
+
+; NO_CMD: Unknown subcommand 'nofile.json'
+; INVALID_CMD: Unknown subcommand 'invalidCmd'
+; NO_FLAG: Unknown command line argument 'nofile.json'. 
+; NO_FILE: 'nofile.json': No such file or directory
+; BAD_JSON: Expected object key
+; NO_VECTOR: expected array
+; NO_CTX: missing value at (root)[0].Guid
+; NO_COUNTERS: missing value at (root)[0].Counters
+; BAD_SUBCTX: expected array at (root)[0].Callsites[0]
+; NO_DIR: failed to open output

diff  --git a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test
new file mode 100644
index 0000000000000..d430818d05442
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test
@@ -0,0 +1,46 @@
+; RUN: mkdir -p %t
+; RUN: llvm-ctxprof-util fromJSON --input %S/Inputs/empty.json -output %t/empty.bitstream
+; RUN: llvm-bcanalyzer --dump %t/empty.bitstream | FileCheck %s --check-prefix=EMPTY
+
+; RUN: llvm-ctxprof-util fromJSON --input %S/Inputs/valid.json -output %t/valid.bitstream
+
+; For the valid case, check against a reference output.
+; Note that uint64_t are printed as signed values by llvm-bcanalyzer:
+;  * 18446744073709551613 in json is -3 in the output
+;  * 18446744073709551612 in json is -4 in the output
+; Also we have no callee/context at index 0, 2 callsites for index 1, and one for
+; index 2.
+; RUN: llvm-bcanalyzer --dump %t/valid.bitstream | FileCheck %s --check-prefix=VALID
+
+; EMPTY: <BLOCKINFO_BLOCK/>
+; EMPTY-NEXT: <Metadata NumWords=1 BlockCodeSize=2>
+; EMPTY-NEXT:   <Version op0=1/>
+; EMPTY-NEXT: </Metadata>
+
+; VALID:      <BLOCKINFO_BLOCK/>
+; VALID-NEXT: <Metadata NumWords=30 BlockCodeSize=2>
+; VALID-NEXT:   <Version op0=1/>
+; VALID-NEXT:   <Context NumWords=20 BlockCodeSize=2>
+; VALID-NEXT:     <GUID op0=1000/>
+; VALID-NEXT:     <Counters op0=1 op1=2 op2=3/>
+; VALID-NEXT:     <Context NumWords=5 BlockCodeSize=2>
+; VALID-NEXT:       <GUID op0=-3/>
+; VALID-NEXT:       <CalleeIndex op0=1/>
+; VALID-NEXT:       <Counters op0=6 op1=7 op2=8/>
+; VALID-NEXT:     </Context>
+; VALID-NEXT:     <Context NumWords=3 BlockCodeSize=2>
+; VALID-NEXT:       <GUID op0=2000/>
+; VALID-NEXT:       <CalleeIndex op0=1/>
+; VALID-NEXT:       <Counters op0=4 op1=5/>
+; VALID-NEXT:     </Context>
+; VALID-NEXT:     <Context NumWords=3 BlockCodeSize=2>
+; VALID-NEXT:       <GUID op0=3000/>
+; VALID-NEXT:       <CalleeIndex op0=2/>
+; VALID-NEXT:       <Counters op0=40 op1=50/>
+; VALID-NEXT:     </Context>
+; VALID-NEXT:   </Context>
+; VALID-NEXT:   <Context NumWords=4 BlockCodeSize=2>
+; VALID-NEXT:     <GUID op0=-4/>
+; VALID-NEXT:     <Counters op0=5 op1=9 op2=10/>
+; VALID-NEXT:   </Context>
+; VALID-NEXT: </Metadata>

diff  --git a/llvm/tools/CMakeLists.txt b/llvm/tools/CMakeLists.txt
index db66dad5dc0db..b9c5a79849ec8 100644
--- a/llvm/tools/CMakeLists.txt
+++ b/llvm/tools/CMakeLists.txt
@@ -32,6 +32,7 @@ add_llvm_tool_subdirectory(lto)
 add_llvm_tool_subdirectory(gold)
 add_llvm_tool_subdirectory(llvm-ar)
 add_llvm_tool_subdirectory(llvm-config)
+add_llvm_tool_subdirectory(llvm-ctxprof-util)
 add_llvm_tool_subdirectory(llvm-lto)
 add_llvm_tool_subdirectory(llvm-profdata)
 

diff  --git a/llvm/tools/llvm-ctxprof-util/CMakeLists.txt b/llvm/tools/llvm-ctxprof-util/CMakeLists.txt
new file mode 100644
index 0000000000000..abf8e1aa0651f
--- /dev/null
+++ b/llvm/tools/llvm-ctxprof-util/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Object
+  ProfileData
+  Support
+  )
+
+add_llvm_tool(llvm-ctxprof-util
+llvm-ctxprof-util.cpp
+
+  DEPENDS
+  intrinsics_gen
+  GENERATE_DRIVER
+  )

diff  --git a/llvm/tools/llvm-ctxprof-util/llvm-ctxprof-util.cpp b/llvm/tools/llvm-ctxprof-util/llvm-ctxprof-util.cpp
new file mode 100644
index 0000000000000..ded8c8a6e4332
--- /dev/null
+++ b/llvm/tools/llvm-ctxprof-util/llvm-ctxprof-util.cpp
@@ -0,0 +1,150 @@
+//===--- PGOCtxProfJSONReader.h - JSON format  ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// JSON format for the contextual profile for testing.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/ProfileData/CtxInstrContextNode.h"
+#include "llvm/ProfileData/PGOCtxProfWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/LLVMDriver.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::SubCommand FromJSON("fromJSON", "Convert from json");
+
+static cl::opt<std::string> InputFilename(
+    "input", cl::value_desc("input"), cl::init("-"),
+    cl::desc(
+        "Input file. The format is an array of contexts.\n"
+        "Each context is a dictionary with the following keys:\n"
+        "'Guid', mandatory. The value is a 64-bit integer.\n"
+        "'Counters', mandatory. An array of 32-bit ints. These are the "
+        "counter values.\n"
+        "'Contexts', optional. An array containing arrays of contexts. The "
+        "context array at a position 'i' is the set of callees at that "
+        "callsite index. Use an empty array to indicate no callees."),
+    cl::sub(FromJSON));
+
+static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
+                                           cl::init("-"),
+                                           cl::desc("Output file"),
+                                           cl::sub(FromJSON));
+
+namespace {
+// A structural representation of the JSON input.
+struct DeserializableCtx {
+  GlobalValue::GUID Guid = 0;
+  std::vector<uint64_t> Counters;
+  std::vector<std::vector<DeserializableCtx>> Callsites;
+};
+
+ctx_profile::ContextNode *
+createNode(std::vector<std::unique_ptr<char[]>> &Nodes,
+           const std::vector<DeserializableCtx> &DCList);
+
+// Convert a DeserializableCtx into a ContextNode, potentially linking it to
+// its sibling (e.g. callee at same callsite) "Next".
+ctx_profile::ContextNode *
+createNode(std::vector<std::unique_ptr<char[]>> &Nodes,
+           const DeserializableCtx &DC,
+           ctx_profile::ContextNode *Next = nullptr) {
+  auto AllocSize = ctx_profile::ContextNode::getAllocSize(DC.Counters.size(),
+                                                          DC.Callsites.size());
+  auto *Mem = Nodes.emplace_back(std::make_unique<char[]>(AllocSize)).get();
+  std::memset(Mem, 0, AllocSize);
+  auto *Ret = new (Mem) ctx_profile::ContextNode(DC.Guid, DC.Counters.size(),
+                                                 DC.Callsites.size(), Next);
+  std::memcpy(Ret->counters(), DC.Counters.data(),
+              sizeof(uint64_t) * DC.Counters.size());
+  for (const auto &[I, DCList] : llvm::enumerate(DC.Callsites))
+    Ret->subContexts()[I] = createNode(Nodes, DCList);
+  return Ret;
+}
+
+// Convert a list of DeserializableCtx into a linked list of ContextNodes.
+ctx_profile::ContextNode *
+createNode(std::vector<std::unique_ptr<char[]>> &Nodes,
+           const std::vector<DeserializableCtx> &DCList) {
+  ctx_profile::ContextNode *List = nullptr;
+  for (const auto &DC : DCList)
+    List = createNode(Nodes, DC, List);
+  return List;
+}
+} // namespace
+
+namespace llvm {
+namespace json {
+// Hook into the JSON deserialization.
+bool fromJSON(const Value &E, DeserializableCtx &R, Path P) {
+  json::ObjectMapper Mapper(E, P);
+  return Mapper && Mapper.map("Guid", R.Guid) &&
+         Mapper.map("Counters", R.Counters) &&
+         Mapper.mapOptional("Callsites", R.Callsites);
+}
+} // namespace json
+} // namespace llvm
+
+// Save the bitstream profile from the JSON representation.
+Error convertFromJSON() {
+  auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilename);
+  if (!BufOrError)
+    return createFileError(InputFilename, BufOrError.getError());
+  auto P = json::parse(BufOrError.get()->getBuffer());
+  if (!P)
+    return P.takeError();
+
+  std::vector<DeserializableCtx> DCList;
+  json::Path::Root R("");
+  if (!fromJSON(*P, DCList, R))
+    return R.getError();
+  // Nodes provides memory backing for the ContextualNodes.
+  std::vector<std::unique_ptr<char[]>> Nodes;
+  std::error_code EC;
+  raw_fd_stream Out(OutputFilename, EC);
+  if (EC)
+    return createStringError(EC, "failed to open output");
+  PGOCtxProfileWriter Writer(Out);
+  for (const auto &DC : DCList) {
+    auto *TopList = createNode(Nodes, DC);
+    if (!TopList)
+      return createStringError(
+          "Unexpected error converting internal structure to ctx profile");
+    Writer.write(*TopList);
+  }
+  if (EC)
+    return createStringError(EC, "failed to write output");
+  return Error::success();
+}
+
+int llvm_ctxprof_util_main(int argc, char **argv, const llvm::ToolContext &) {
+  cl::ParseCommandLineOptions(argc, argv, "LLVM Contextual Profile Utils\n");
+  ExitOnError ExitOnErr("llvm-ctxprof-util: ");
+  if (FromJSON) {
+    if (auto E = convertFromJSON()) {
+      handleAllErrors(std::move(E), [&](const ErrorInfoBase &E) {
+        E.log(errs());
+        errs() << "\n";
+      });
+      return 1;
+    }
+    return 0;
+  }
+  llvm_unreachable("Unknown subcommands should have been handled by the "
+                   "command line parser.");
+}


        


More information about the llvm-commits mailing list