[llvm] Reland [CGData] llvm-cgdata #89884 (PR #101461)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 14:49:22 PDT 2024


================
@@ -0,0 +1,352 @@
+//===-- llvm-cgdata.cpp - LLVM CodeGen Data Tool --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// llvm-cgdata parses raw codegen data embedded in compiled binary files, and
+// merges them into a single .cgdata file. It can also inspect and maninuplate
+// a .cgdata file. This .cgdata can contain various codegen data like outlining
+// information, and it can be used to optimize the code in the subsequent build.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGenData/CodeGenDataReader.h"
+#include "llvm/CodeGenData/CodeGenDataWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/LLVMDriver.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+enum CGDataFormat {
+  Invalid,
+  Text,
+  Binary,
+};
+
+enum CGDataAction {
+  Convert,
+  Merge,
+  Show,
+};
+
+// Command-line option boilerplate.
+namespace {
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
+#include "Opts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE)                                                    \
+  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
+  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
+                                                std::size(NAME##_init) - 1);
+#include "Opts.inc"
+#undef PREFIX
+
+using namespace llvm::opt;
+static constexpr opt::OptTable::Info InfoTable[] = {
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
+#include "Opts.inc"
+#undef OPTION
+};
+
+class CGDataOptTable : public opt::GenericOptTable {
+public:
+  CGDataOptTable() : GenericOptTable(InfoTable) {}
+};
+} // end anonymous namespace
+
+// Options
+static StringRef ToolName;
+static StringRef OutputFilename = "-";
+static StringRef Filename;
+static bool ShowCGDataVersion;
+static CGDataAction Action;
+static std::optional<CGDataFormat> OutputFormat;
+static std::vector<std::string> InputFilenames;
+
+// TODO: Add a doc, https://llvm.org/docs/CommandGuide/llvm-cgdata.html
+
+static void exitWithError(Twine Message, std::string Whence = "",
+                          std::string Hint = "") {
+  WithColor::error();
+  if (!Whence.empty())
+    errs() << Whence << ": ";
+  errs() << Message << "\n";
+  if (!Hint.empty())
+    WithColor::note() << Hint << "\n";
+  ::exit(1);
+}
+
+static void exitWithError(Error E, StringRef Whence = "") {
+  if (E.isA<CGDataError>()) {
+    handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
+      exitWithError(IPE.message(), std::string(Whence));
+    });
+    return;
+  }
+
+  exitWithError(toString(std::move(E)), std::string(Whence));
+}
+
+static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
+  exitWithError(EC.message(), std::string(Whence));
+}
+
+static int convert_main(int argc, const char *argv[]) {
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename, EC,
+                    OutputFormat == CGDataFormat::Text
+                        ? sys::fs::OF_TextWithCRLF
+                        : sys::fs::OF_None);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  auto FS = vfs::getRealFileSystem();
+  auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
+  if (Error E = ReaderOrErr.takeError())
+    exitWithError(std::move(E), Filename);
+
+  CodeGenDataWriter Writer;
+  auto Reader = ReaderOrErr->get();
+  if (Reader->hasOutlinedHashTree()) {
+    OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree());
+    Writer.addRecord(Record);
+  }
+
+  if (OutputFormat == CGDataFormat::Text) {
+    if (Error E = Writer.writeText(OS))
+      exitWithError(std::move(E));
+  } else {
+    if (Error E = Writer.write(OS))
+      exitWithError(std::move(E));
+  }
+
+  return 0;
+}
+
+static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
+                         OutlinedHashTreeRecord &GlobalOutlineRecord);
+
+static bool handleArchive(StringRef Filename, Archive &Arch,
+                          OutlinedHashTreeRecord &GlobalOutlineRecord) {
+  bool Result = true;
+  Error Err = Error::success();
+  for (const auto &Child : Arch.children(Err)) {
+    auto BuffOrErr = Child.getMemoryBufferRef();
+    if (Error E = BuffOrErr.takeError())
+      exitWithError(std::move(E), Filename);
+    auto NameOrErr = Child.getName();
+    if (Error E = NameOrErr.takeError())
+      exitWithError(std::move(E), Filename);
+    std::string Name = (Filename + "(" + NameOrErr.get() + ")").str();
+    Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord);
+  }
+  if (Err)
+    exitWithError(std::move(Err), Filename);
+  return Result;
+}
+
+static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
+                         OutlinedHashTreeRecord &GlobalOutlineRecord) {
+  Expected<std::unique_ptr<object::Binary>> BinOrErr =
+      object::createBinary(Buffer);
+  if (Error E = BinOrErr.takeError())
+    exitWithError(std::move(E), Filename);
+
+  bool Result = true;
+  if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
+    if (Error E =
+            CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord))
+      exitWithError(std::move(E), Filename);
+  } else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) {
+    Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord);
+  } else {
+    // TODO: Support for the MachO universal binary format.
+    errs() << "Error: unsupported binary file: " << Filename << "\n";
+    Result = false;
+  }
+
+  return Result;
+}
+
+static bool handleFile(StringRef Filename,
+                       OutlinedHashTreeRecord &GlobalOutlineRecord) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+      MemoryBuffer::getFileOrSTDIN(Filename);
+  if (std::error_code EC = BuffOrErr.getError())
+    exitWithErrorCode(EC, Filename);
+  return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord);
+}
+
+static int merge_main(int argc, const char *argv[]) {
+  bool Result = true;
+  OutlinedHashTreeRecord GlobalOutlineRecord;
+  for (auto &Filename : InputFilenames)
+    Result &= handleFile(Filename, GlobalOutlineRecord);
+
+  if (!Result)
+    exitWithError("failed to merge codegen data files.");
+
+  CodeGenDataWriter Writer;
+  if (!GlobalOutlineRecord.empty())
+    Writer.addRecord(GlobalOutlineRecord);
+
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename, EC,
+                    OutputFormat == CGDataFormat::Text
+                        ? sys::fs::OF_TextWithCRLF
+                        : sys::fs::OF_None);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  if (OutputFormat == CGDataFormat::Text) {
+    if (Error E = Writer.writeText(OS))
+      exitWithError(std::move(E));
+  } else {
+    if (Error E = Writer.write(OS))
+      exitWithError(std::move(E));
+  }
+
+  return 0;
+}
+
+static int show_main(int argc, const char *argv[]) {
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  auto FS = vfs::getRealFileSystem();
+  auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
+  if (Error E = ReaderOrErr.takeError())
+    exitWithError(std::move(E), Filename);
+
+  auto Reader = ReaderOrErr->get();
+  if (ShowCGDataVersion)
+    OS << "Version: " << Reader->getVersion() << "\n";
+
+  if (Reader->hasOutlinedHashTree()) {
+    auto Tree = Reader->releaseOutlinedHashTree();
+    OS << "Outlined hash tree:\n";
+    OS << "  Total Node Count: " << Tree->size() << "\n";
+    OS << "  Terminal Node Count: " << Tree->size(/*GetTerminalCountOnly=*/true)
+       << "\n";
+    OS << "  Depth: " << Tree->depth() << "\n";
+  }
+
+  return 0;
+}
+
+static void parseArgs(int argc, char **argv) {
+  CGDataOptTable Tbl;
+  ToolName = argv[0];
+  llvm::BumpPtrAllocator A;
+  llvm::StringSaver Saver{A};
+  llvm::opt::InputArgList Args =
+      Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
+        llvm::errs() << Msg << '\n';
+        std::exit(1);
+      });
+
+  if (Args.hasArg(OPT_help)) {
+    Tbl.printHelp(
+        llvm::outs(),
+        "llvm-cgdata <action> [options] (<binary files>|<.cgdata file>)",
+        ToolName.str().c_str());
+    std::exit(0);
+  }
+  if (Args.hasArg(OPT_version)) {
+    cl::PrintVersionMessage();
+    std::exit(0);
+  }
+
+  ShowCGDataVersion = Args.hasArg(OPT_cgdata_version);
+
+  auto parseFormat = [](const StringRef FT) {
+    return StringSwitch<CGDataFormat>(FT)
+        .Case("text", CGDataFormat::Text)
+        .Case("binary", CGDataFormat::Binary)
+        .Default(CGDataFormat::Invalid);
+  };
+  if (opt::Arg *A = Args.getLastArg(OPT_format)) {
+    StringRef OF = A->getValue();
+    OutputFormat = parseFormat(OF);
+    if (OutputFormat == CGDataFormat::Invalid)
+      exitWithError("unsupported format '" + OF + "'");
+  }
+
+  InputFilenames = Args.getAllArgValues(OPT_INPUT);
+  if (InputFilenames.empty())
+    exitWithError("No input file is specified.");
+  Filename = InputFilenames[0];
+
+  if (Args.hasArg(OPT_output)) {
+    OutputFilename = Args.getLastArgValue(OPT_output);
+    for (auto &Filename : InputFilenames)
+      if (Filename == OutputFilename)
+        exitWithError(
+            "Input file name cannot be the same as the output file name!\n");
+  }
+
+  SmallVector<opt::Arg *, 1> ActionArgs(Args.filtered(OPT_action_group));
----------------
ellishg wrote:

Do you expect this to ever have more than one element? Can we use `ArrayRef` or some constant-size container?

https://github.com/llvm/llvm-project/pull/101461


More information about the llvm-commits mailing list