[llvm] 9bb5556 - Reland [CGData] llvm-cgdata #89884 (#101461)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 20 07:26:53 PDT 2024


Author: Kyungwoo Lee
Date: 2024-08-20T07:26:50-07:00
New Revision: 9bb555688caf6ae4ba89fee5baa3dc29fec6a9b1

URL: https://github.com/llvm/llvm-project/commit/9bb555688caf6ae4ba89fee5baa3dc29fec6a9b1
DIFF: https://github.com/llvm/llvm-project/commit/9bb555688caf6ae4ba89fee5baa3dc29fec6a9b1.diff

LOG: Reland [CGData] llvm-cgdata #89884 (#101461)

Reland [CGData] llvm-cgdata #89884 using `Opt` instead of `cl`
- Action options are required, `--convert`, `--show`, `--merge`. This
was similar to sub-commands previously implemented, but having a prefix
`--`.
- `--format` option is added, which specifies `text` or `binary`.

---------

Co-authored-by: Kyungwoo Lee <kyulee at fb.com>

Added: 
    llvm/include/llvm/CGData/CodeGenData.h
    llvm/include/llvm/CGData/CodeGenData.inc
    llvm/include/llvm/CGData/CodeGenDataReader.h
    llvm/include/llvm/CGData/CodeGenDataWriter.h
    llvm/include/llvm/CGData/OutlinedHashTree.h
    llvm/include/llvm/CGData/OutlinedHashTreeRecord.h
    llvm/lib/CGData/CMakeLists.txt
    llvm/lib/CGData/CodeGenData.cpp
    llvm/lib/CGData/CodeGenDataReader.cpp
    llvm/lib/CGData/CodeGenDataWriter.cpp
    llvm/lib/CGData/OutlinedHashTree.cpp
    llvm/lib/CGData/OutlinedHashTreeRecord.cpp
    llvm/test/tools/llvm-cgdata/convert.test
    llvm/test/tools/llvm-cgdata/empty.test
    llvm/test/tools/llvm-cgdata/error.test
    llvm/test/tools/llvm-cgdata/merge-archive.test
    llvm/test/tools/llvm-cgdata/merge-concat.test
    llvm/test/tools/llvm-cgdata/merge-double.test
    llvm/test/tools/llvm-cgdata/merge-single.test
    llvm/test/tools/llvm-cgdata/show.test
    llvm/tools/llvm-cgdata/CMakeLists.txt
    llvm/tools/llvm-cgdata/Opts.td
    llvm/tools/llvm-cgdata/llvm-cgdata.cpp
    llvm/unittests/CGData/CMakeLists.txt
    llvm/unittests/CGData/OutlinedHashTreeRecordTest.cpp
    llvm/unittests/CGData/OutlinedHashTreeTest.cpp

Modified: 
    llvm/lib/CMakeLists.txt
    llvm/test/CMakeLists.txt
    llvm/test/lit.cfg.py
    llvm/unittests/CMakeLists.txt

Removed: 
    llvm/include/llvm/CodeGenData/OutlinedHashTree.h
    llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h
    llvm/lib/CodeGenData/CMakeLists.txt
    llvm/lib/CodeGenData/OutlinedHashTree.cpp
    llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp
    llvm/unittests/CodeGenData/CMakeLists.txt
    llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp
    llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp


################################################################################
diff  --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
new file mode 100644
index 00000000000000..84133a433170fe
--- /dev/null
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -0,0 +1,204 @@
+//===- CodeGenData.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for codegen data that has stable summary which
+// can be used to optimize the code in the subsequent codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_CODEGENDATA_H
+#define LLVM_CGDATA_CODEGENDATA_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/CGData/OutlinedHashTree.h"
+#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/Triple.h"
+#include <mutex>
+
+namespace llvm {
+
+enum CGDataSectKind {
+#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
+#include "llvm/CGData/CodeGenData.inc"
+};
+
+std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
+                                      Triple::ObjectFormatType OF,
+                                      bool AddSegmentInfo = true);
+
+enum class CGDataKind {
+  Unknown = 0x0,
+  // A function outlining info.
+  FunctionOutlinedHashTree = 0x1,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
+};
+
+const std::error_category &cgdata_category();
+
+enum class cgdata_error {
+  success = 0,
+  eof,
+  bad_magic,
+  bad_header,
+  empty_cgdata,
+  malformed,
+  unsupported_version,
+};
+
+inline std::error_code make_error_code(cgdata_error E) {
+  return std::error_code(static_cast<int>(E), cgdata_category());
+}
+
+class CGDataError : public ErrorInfo<CGDataError> {
+public:
+  CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
+      : Err(Err), Msg(ErrStr.str()) {
+    assert(Err != cgdata_error::success && "Not an error");
+  }
+
+  std::string message() const override;
+
+  void log(raw_ostream &OS) const override { OS << message(); }
+
+  std::error_code convertToErrorCode() const override {
+    return make_error_code(Err);
+  }
+
+  cgdata_error get() const { return Err; }
+  const std::string &getMessage() const { return Msg; }
+
+  /// Consume an Error and return the raw enum value contained within it, and
+  /// the optional error message. The Error must either be a success value, or
+  /// contain a single CGDataError.
+  static std::pair<cgdata_error, std::string> take(Error E) {
+    auto Err = cgdata_error::success;
+    std::string Msg;
+    handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
+      assert(Err == cgdata_error::success && "Multiple errors encountered");
+      Err = IPE.get();
+      Msg = IPE.getMessage();
+    });
+    return {Err, Msg};
+  }
+
+  static char ID;
+
+private:
+  cgdata_error Err;
+  std::string Msg;
+};
+
+enum CGDataMode {
+  None,
+  Read,
+  Write,
+};
+
+class CodeGenData {
+  /// Global outlined hash tree that has oulined hash sequences across modules.
+  std::unique_ptr<OutlinedHashTree> PublishedHashTree;
+
+  /// This flag is set when -fcodegen-data-generate is passed.
+  /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
+  bool EmitCGData;
+
+  /// This is a singleton instance which is thread-safe. Unlike profile data
+  /// which is largely function-based, codegen data describes the whole module.
+  /// Therefore, this can be initialized once, and can be used across modules
+  /// instead of constructing the same one for each codegen backend.
+  static std::unique_ptr<CodeGenData> Instance;
+  static std::once_flag OnceFlag;
+
+  CodeGenData() = default;
+
+public:
+  ~CodeGenData() = default;
+
+  static CodeGenData &getInstance();
+
+  /// Returns true if we have a valid outlined hash tree.
+  bool hasOutlinedHashTree() {
+    return PublishedHashTree && !PublishedHashTree->empty();
+  }
+
+  /// Returns the outlined hash tree. This can be globally used in a read-only
+  /// manner.
+  const OutlinedHashTree *getOutlinedHashTree() {
+    return PublishedHashTree.get();
+  }
+
+  /// Returns true if we should write codegen data.
+  bool emitCGData() { return EmitCGData; }
+
+  /// Publish the (globally) merged or read outlined hash tree.
+  void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
+    PublishedHashTree = std::move(HashTree);
+    // Ensure we disable emitCGData as we do not want to read and write both.
+    EmitCGData = false;
+  }
+};
+
+namespace cgdata {
+
+inline bool hasOutlinedHashTree() {
+  return CodeGenData::getInstance().hasOutlinedHashTree();
+}
+
+inline const OutlinedHashTree *getOutlinedHashTree() {
+  return CodeGenData::getInstance().getOutlinedHashTree();
+}
+
+inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
+
+inline void
+publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
+  CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
+}
+
+void warn(Error E, StringRef Whence = "");
+void warn(Twine Message, std::string Whence = "", std::string Hint = "");
+
+} // end namespace cgdata
+
+namespace IndexedCGData {
+
+// A signature for data validation, representing "\xffcgdata\x81" in
+// little-endian order
+const uint64_t Magic = 0x81617461646763ff;
+
+enum CGDataVersion {
+  // Version 1 is the first version. This version supports the outlined
+  // hash tree.
+  Version1 = 1,
+  CurrentVersion = CG_DATA_INDEX_VERSION
+};
+const uint64_t Version = CGDataVersion::CurrentVersion;
+
+struct Header {
+  uint64_t Magic;
+  uint32_t Version;
+  uint32_t DataKind;
+  uint64_t OutlinedHashTreeOffset;
+
+  // New fields should only be added at the end to ensure that the size
+  // computation is correct. The methods below need to be updated to ensure that
+  // the new field is read correctly.
+
+  // Reads a header struct from the buffer.
+  static Expected<Header> readFromBuffer(const unsigned char *Curr);
+};
+
+} // end namespace IndexedCGData
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_PREPARE_H

diff  --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc
new file mode 100644
index 00000000000000..08ec14ea051a0c
--- /dev/null
+++ b/llvm/include/llvm/CGData/CodeGenData.inc
@@ -0,0 +1,46 @@
+/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+/*
+ * This is the main file that defines all the data structure, signature,
+ * constant literals that are shared across compiler, host tools (reader/writer)
+ * to support codegen data.
+ *
+\*===----------------------------------------------------------------------===*/
+
+/* Helper macros.  */
+#define CG_DATA_SIMPLE_QUOTE(x) #x
+#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x)
+
+#ifdef CG_DATA_SECT_ENTRY
+#define CG_DATA_DEFINED
+CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
+                   CG_DATA_OUTLINE_COFF, "__DATA,")
+
+#undef CG_DATA_SECT_ENTRY
+#endif
+
+/* section name strings common to all targets other
+   than WIN32 */
+#define CG_DATA_OUTLINE_COMMON __llvm_outline
+/* Since cg data sections are not allocated, we don't need to
+ * access them at runtime.
+ */
+#define CG_DATA_OUTLINE_COFF ".loutline"
+
+#ifdef _WIN32
+/* Runtime section names and name strings.  */
+#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
+
+#else
+/* Runtime section names and name strings.  */
+#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
+
+#endif
+
+/* Indexed codegen data format version (start from 1). */
+#define CG_DATA_INDEX_VERSION 1

diff  --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h
new file mode 100644
index 00000000000000..1ee4bfbe480233
--- /dev/null
+++ b/llvm/include/llvm/CGData/CodeGenDataReader.h
@@ -0,0 +1,154 @@
+//===- CodeGenDataReader.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading codegen data.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_CODEGENDATAREADER_H
+#define LLVM_CGDATA_CODEGENDATAREADER_H
+
+#include "llvm/CGData/CodeGenData.h"
+#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+namespace llvm {
+
+class CodeGenDataReader {
+  cgdata_error LastError = cgdata_error::success;
+  std::string LastErrorMsg;
+
+public:
+  CodeGenDataReader() = default;
+  virtual ~CodeGenDataReader() = default;
+
+  /// Read the header.  Required before reading first record.
+  virtual Error read() = 0;
+  /// Return the codegen data version.
+  virtual uint32_t getVersion() const = 0;
+  /// Return the codegen data kind.
+  virtual CGDataKind getDataKind() const = 0;
+  /// Return true if the data has an outlined hash tree.
+  virtual bool hasOutlinedHashTree() const = 0;
+  /// Return the outlined hash tree that is released from the reader.
+  std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
+    return std::move(HashTreeRecord.HashTree);
+  }
+
+  /// Factory method to create an appropriately typed reader for the given
+  /// codegen data file path and file system.
+  static Expected<std::unique_ptr<CodeGenDataReader>>
+  create(const Twine &Path, vfs::FileSystem &FS);
+
+  /// Factory method to create an appropriately typed reader for the given
+  /// memory buffer.
+  static Expected<std::unique_ptr<CodeGenDataReader>>
+  create(std::unique_ptr<MemoryBuffer> Buffer);
+
+  /// Extract the cgdata embedded in sections from the given object file and
+  /// merge them into the GlobalOutlineRecord. This is a static helper that
+  /// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
+  static Error mergeFromObjectFile(const object::ObjectFile *Obj,
+                                   OutlinedHashTreeRecord &GlobalOutlineRecord);
+
+protected:
+  /// The outlined hash tree that has been read. When it's released by
+  /// releaseOutlinedHashTree(), it's no longer valid.
+  OutlinedHashTreeRecord HashTreeRecord;
+
+  /// Set the current error and return same.
+  Error error(cgdata_error Err, const std::string &ErrMsg = "") {
+    LastError = Err;
+    LastErrorMsg = ErrMsg;
+    if (Err == cgdata_error::success)
+      return Error::success();
+    return make_error<CGDataError>(Err, ErrMsg);
+  }
+
+  Error error(Error &&E) {
+    handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
+      LastError = IPE.get();
+      LastErrorMsg = IPE.getMessage();
+    });
+    return make_error<CGDataError>(LastError, LastErrorMsg);
+  }
+
+  /// Clear the current error and return a successful one.
+  Error success() { return error(cgdata_error::success); }
+};
+
+class IndexedCodeGenDataReader : public CodeGenDataReader {
+  /// The codegen data file contents.
+  std::unique_ptr<MemoryBuffer> DataBuffer;
+  /// The header
+  IndexedCGData::Header Header;
+
+public:
+  IndexedCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer)
+      : DataBuffer(std::move(DataBuffer)) {}
+  IndexedCodeGenDataReader(const IndexedCodeGenDataReader &) = delete;
+  IndexedCodeGenDataReader &
+  operator=(const IndexedCodeGenDataReader &) = delete;
+
+  /// Return true if the given buffer is in binary codegen data format.
+  static bool hasFormat(const MemoryBuffer &Buffer);
+  /// Read the contents including the header.
+  Error read() override;
+  /// Return the codegen data version.
+  uint32_t getVersion() const override { return Header.Version; }
+  /// Return the codegen data kind.
+  CGDataKind getDataKind() const override {
+    return static_cast<CGDataKind>(Header.DataKind);
+  }
+  /// Return true if the header indicates the data has an outlined hash tree.
+  /// This does not mean that the data is still available.
+  bool hasOutlinedHashTree() const override {
+    return Header.DataKind &
+           static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
+  }
+};
+
+/// This format is a simple text format that's suitable for test data.
+/// The header is a custom format starting with `:` per line to indicate which
+/// codegen data is recorded. `#` is used to indicate a comment.
+/// The subsequent data is a YAML format per each codegen data in order.
+/// Currently, it only has a function outlined hash tree.
+class TextCodeGenDataReader : public CodeGenDataReader {
+  /// The codegen data file contents.
+  std::unique_ptr<MemoryBuffer> DataBuffer;
+  /// Iterator over the profile data.
+  line_iterator Line;
+  /// Describe the kind of the codegen data.
+  CGDataKind DataKind = CGDataKind::Unknown;
+
+public:
+  TextCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
+      : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
+  TextCodeGenDataReader(const TextCodeGenDataReader &) = delete;
+  TextCodeGenDataReader &operator=(const TextCodeGenDataReader &) = delete;
+
+  /// Return true if the given buffer is in text codegen data format.
+  static bool hasFormat(const MemoryBuffer &Buffer);
+  /// Read the contents including the header.
+  Error read() override;
+  /// Text format does not have version, so return 0.
+  uint32_t getVersion() const override { return 0; }
+  /// Return the codegen data kind.
+  CGDataKind getDataKind() const override { return DataKind; }
+  /// Return true if the header indicates the data has an outlined hash tree.
+  /// This does not mean that the data is still available.
+  bool hasOutlinedHashTree() const override {
+    return static_cast<uint32_t>(DataKind) &
+           static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CGDATA_CODEGENDATAREADER_H

diff  --git a/llvm/include/llvm/CGData/CodeGenDataWriter.h b/llvm/include/llvm/CGData/CodeGenDataWriter.h
new file mode 100644
index 00000000000000..5cb8377b1d07e5
--- /dev/null
+++ b/llvm/include/llvm/CGData/CodeGenDataWriter.h
@@ -0,0 +1,100 @@
+//===- CodeGenDataWriter.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing codegen data.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_CODEGENDATAWRITER_H
+#define LLVM_CGDATA_CODEGENDATAWRITER_H
+
+#include "llvm/CGData/CodeGenData.h"
+#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+/// A struct to define how the data stream should be patched.
+struct CGDataPatchItem {
+  uint64_t Pos; // Where to patch.
+  uint64_t *D;  // Pointer to an array of source data.
+  int N;        // Number of elements in \c D array.
+};
+
+/// A wrapper class to abstract writer stream with support of bytes
+/// back patching.
+class CGDataOStream {
+public:
+  CGDataOStream(raw_fd_ostream &FD)
+      : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
+  CGDataOStream(raw_string_ostream &STR)
+      : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
+
+  uint64_t tell() { return OS.tell(); }
+  void write(uint64_t V) { LE.write<uint64_t>(V); }
+  void write32(uint32_t V) { LE.write<uint32_t>(V); }
+  void write8(uint8_t V) { LE.write<uint8_t>(V); }
+
+  // \c patch can only be called when all data is written and flushed.
+  // For raw_string_ostream, the patch is done on the target string
+  // directly and it won't be reflected in the stream's internal buffer.
+  void patch(ArrayRef<CGDataPatchItem> P);
+
+  // If \c OS is an instance of \c raw_fd_ostream, this field will be
+  // true. Otherwise, \c OS will be an raw_string_ostream.
+  bool IsFDOStream;
+  raw_ostream &OS;
+  support::endian::Writer LE;
+};
+
+class CodeGenDataWriter {
+  /// The outlined hash tree to be written.
+  OutlinedHashTreeRecord HashTreeRecord;
+
+  /// A bit mask describing the kind of the codegen data.
+  CGDataKind DataKind = CGDataKind::Unknown;
+
+public:
+  CodeGenDataWriter() = default;
+  ~CodeGenDataWriter() = default;
+
+  /// Add the outlined hash tree record. The input Record is released.
+  void addRecord(OutlinedHashTreeRecord &Record);
+
+  /// Write the codegen data to \c OS
+  Error write(raw_fd_ostream &OS);
+
+  /// Write the codegen data in text format to \c OS
+  Error writeText(raw_fd_ostream &OS);
+
+  /// Return the attributes of the current CGData.
+  CGDataKind getCGDataKind() const { return DataKind; }
+
+  /// Return true if the header indicates the data has an outlined hash tree.
+  bool hasOutlinedHashTree() const {
+    return static_cast<uint32_t>(DataKind) &
+           static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
+  }
+
+private:
+  /// The offset of the outlined hash tree in the file.
+  uint64_t OutlinedHashTreeOffset;
+
+  /// Write the codegen data header to \c COS
+  Error writeHeader(CGDataOStream &COS);
+
+  /// Write the codegen data header in text to \c OS
+  Error writeHeaderText(raw_fd_ostream &OS);
+
+  Error writeImpl(CGDataOStream &COS);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CGDATA_CODEGENDATAWRITER_H

diff  --git a/llvm/include/llvm/CodeGenData/OutlinedHashTree.h b/llvm/include/llvm/CGData/OutlinedHashTree.h
similarity index 97%
rename from llvm/include/llvm/CodeGenData/OutlinedHashTree.h
rename to llvm/include/llvm/CGData/OutlinedHashTree.h
index 2c8a9288f8a8c7..9ab36df863eef0 100644
--- a/llvm/include/llvm/CodeGenData/OutlinedHashTree.h
+++ b/llvm/include/llvm/CGData/OutlinedHashTree.h
@@ -12,8 +12,8 @@
 //
 //===---------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGENDATA_OUTLINEDHASHTREE_H
-#define LLVM_CODEGENDATA_OUTLINEDHASHTREE_H
+#ifndef LLVM_CGDATA_OUTLINEDHASHTREE_H
+#define LLVM_CGDATA_OUTLINEDHASHTREE_H
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StableHashing.h"

diff  --git a/llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h b/llvm/include/llvm/CGData/OutlinedHashTreeRecord.h
similarity index 92%
rename from llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h
rename to llvm/include/llvm/CGData/OutlinedHashTreeRecord.h
index de397c9ca5e70d..dd599ff6a7a624 100644
--- a/llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h
+++ b/llvm/include/llvm/CGData/OutlinedHashTreeRecord.h
@@ -13,10 +13,10 @@
 //
 //===---------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H
-#define LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H
+#ifndef LLVM_CGDATA_OUTLINEDHASHTREERECORD_H
+#define LLVM_CGDATA_OUTLINEDHASHTREERECORD_H
 
-#include "llvm/CodeGenData/OutlinedHashTree.h"
+#include "llvm/CGData/OutlinedHashTree.h"
 
 namespace llvm {
 
@@ -72,4 +72,4 @@ struct OutlinedHashTreeRecord {
 
 } // end namespace llvm
 
-#endif // LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H
+#endif // LLVM_CGDATA_OUTLINEDHASHTREERECORD_H

diff  --git a/llvm/lib/CodeGenData/CMakeLists.txt b/llvm/lib/CGData/CMakeLists.txt
similarity index 52%
rename from llvm/lib/CodeGenData/CMakeLists.txt
rename to llvm/lib/CGData/CMakeLists.txt
index f9d107f52a7153..ff1aab920e7a8c 100644
--- a/llvm/lib/CodeGenData/CMakeLists.txt
+++ b/llvm/lib/CGData/CMakeLists.txt
@@ -1,9 +1,12 @@
-add_llvm_component_library(LLVMCodeGenData
+add_llvm_component_library(LLVMCGData
+  CodeGenData.cpp
+  CodeGenDataReader.cpp
+  CodeGenDataWriter.cpp
   OutlinedHashTree.cpp
   OutlinedHashTreeRecord.cpp
 
   ADDITIONAL_HEADER_DIRS
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGenData
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData
 
   DEPENDS
   intrinsics_gen

diff  --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
new file mode 100644
index 00000000000000..9dd4b1674e094a
--- /dev/null
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -0,0 +1,196 @@
+//===-- CodeGenData.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for codegen data that has stable summary which
+// can be used to optimize the code in the subsequent codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/CGData/CodeGenDataReader.h"
+#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/WithColor.h"
+
+#define DEBUG_TYPE "cg-data"
+
+using namespace llvm;
+using namespace cgdata;
+
+static std::string getCGDataErrString(cgdata_error Err,
+                                      const std::string &ErrMsg = "") {
+  std::string Msg;
+  raw_string_ostream OS(Msg);
+
+  switch (Err) {
+  case cgdata_error::success:
+    OS << "success";
+    break;
+  case cgdata_error::eof:
+    OS << "end of File";
+    break;
+  case cgdata_error::bad_magic:
+    OS << "invalid codegen data (bad magic)";
+    break;
+  case cgdata_error::bad_header:
+    OS << "invalid codegen data (file header is corrupt)";
+    break;
+  case cgdata_error::empty_cgdata:
+    OS << "empty codegen data";
+    break;
+  case cgdata_error::malformed:
+    OS << "malformed codegen data";
+    break;
+  case cgdata_error::unsupported_version:
+    OS << "unsupported codegen data version";
+    break;
+  }
+
+  // If optional error message is not empty, append it to the message.
+  if (!ErrMsg.empty())
+    OS << ": " << ErrMsg;
+
+  return OS.str();
+}
+
+namespace {
+
+// FIXME: This class is only here to support the transition to llvm::Error. It
+// will be removed once this transition is complete. Clients should prefer to
+// deal with the Error value directly, rather than converting to error_code.
+class CGDataErrorCategoryType : public std::error_category {
+  const char *name() const noexcept override { return "llvm.cgdata"; }
+
+  std::string message(int IE) const override {
+    return getCGDataErrString(static_cast<cgdata_error>(IE));
+  }
+};
+
+} // end anonymous namespace
+
+const std::error_category &llvm::cgdata_category() {
+  static CGDataErrorCategoryType ErrorCategory;
+  return ErrorCategory;
+}
+
+std::string CGDataError::message() const {
+  return getCGDataErrString(Err, Msg);
+}
+
+char CGDataError::ID = 0;
+
+namespace {
+
+const char *CodeGenDataSectNameCommon[] = {
+#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix)         \
+  SectNameCommon,
+#include "llvm/CGData/CodeGenData.inc"
+};
+
+const char *CodeGenDataSectNameCoff[] = {
+#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix)         \
+  SectNameCoff,
+#include "llvm/CGData/CodeGenData.inc"
+};
+
+const char *CodeGenDataSectNamePrefix[] = {
+#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix,
+#include "llvm/CGData/CodeGenData.inc"
+};
+
+} // namespace
+
+namespace llvm {
+
+std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
+                                      Triple::ObjectFormatType OF,
+                                      bool AddSegmentInfo) {
+  std::string SectName;
+
+  if (OF == Triple::MachO && AddSegmentInfo)
+    SectName = CodeGenDataSectNamePrefix[CGSK];
+
+  if (OF == Triple::COFF)
+    SectName += CodeGenDataSectNameCoff[CGSK];
+  else
+    SectName += CodeGenDataSectNameCommon[CGSK];
+
+  return SectName;
+}
+
+std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr;
+std::once_flag CodeGenData::OnceFlag;
+
+CodeGenData &CodeGenData::getInstance() {
+  std::call_once(CodeGenData::OnceFlag, []() {
+    Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
+
+    // TODO: Initialize writer or reader mode for the client optimization.
+  });
+  return *(Instance.get());
+}
+
+namespace IndexedCGData {
+
+Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
+  using namespace support;
+
+  static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>,
+                "The header should be standard layout type since we use offset "
+                "of fields to read.");
+  Header H;
+  H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
+  if (H.Magic != IndexedCGData::Magic)
+    return make_error<CGDataError>(cgdata_error::bad_magic);
+  H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
+  if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion)
+    return make_error<CGDataError>(cgdata_error::unsupported_version);
+  H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
+
+  switch (H.Version) {
+    // When a new field is added to the header add a case statement here to
+    // compute the size as offset of the new field + size of the new field. This
+    // relies on the field being added to the end of the list.
+    static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
+                  "Please update the size computation below if a new field has "
+                  "been added to the header, if not add a case statement to "
+                  "fall through to the latest version.");
+  case 1ull:
+    H.OutlinedHashTreeOffset =
+        endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
+  }
+
+  return H;
+}
+
+} // end namespace IndexedCGData
+
+namespace cgdata {
+
+void warn(Twine Message, std::string Whence, std::string Hint) {
+  WithColor::warning();
+  if (!Whence.empty())
+    errs() << Whence << ": ";
+  errs() << Message << "\n";
+  if (!Hint.empty())
+    WithColor::note() << Hint << "\n";
+}
+
+void warn(Error E, StringRef Whence) {
+  if (E.isA<CGDataError>()) {
+    handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
+      warn(IPE.message(), Whence.str(), "");
+    });
+  }
+}
+
+} // end namespace cgdata
+
+} // end namespace llvm

diff  --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
new file mode 100644
index 00000000000000..f7f3a8f42af7e1
--- /dev/null
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -0,0 +1,175 @@
+//===- CodeGenDataReader.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading codegen data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/CodeGenDataReader.h"
+#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#define DEBUG_TYPE "cg-data-reader"
+
+using namespace llvm;
+
+namespace llvm {
+
+static Expected<std::unique_ptr<MemoryBuffer>>
+setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
+  auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
+                                           : FS.getBufferForFile(Filename);
+  if (std::error_code EC = BufferOrErr.getError())
+    return errorCodeToError(EC);
+  return std::move(BufferOrErr.get());
+}
+
+Error CodeGenDataReader::mergeFromObjectFile(
+    const object::ObjectFile *Obj,
+    OutlinedHashTreeRecord &GlobalOutlineRecord) {
+  Triple TT = Obj->makeTriple();
+  auto CGOutLineName =
+      getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
+
+  for (auto &Section : Obj->sections()) {
+    Expected<StringRef> NameOrErr = Section.getName();
+    if (!NameOrErr)
+      return NameOrErr.takeError();
+    Expected<StringRef> ContentsOrErr = Section.getContents();
+    if (!ContentsOrErr)
+      return ContentsOrErr.takeError();
+    auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data());
+    auto *EndData = Data + ContentsOrErr->size();
+
+    if (*NameOrErr == CGOutLineName) {
+      // In case dealing with an executable that has concatenated cgdata,
+      // we want to merge them into a single cgdata.
+      // Although it's not a typical workflow, we support this scenario.
+      while (Data != EndData) {
+        OutlinedHashTreeRecord LocalOutlineRecord;
+        LocalOutlineRecord.deserialize(Data);
+        GlobalOutlineRecord.merge(LocalOutlineRecord);
+      }
+    }
+    // TODO: Add support for other cgdata sections.
+  }
+
+  return Error::success();
+}
+
+Error IndexedCodeGenDataReader::read() {
+  using namespace support;
+
+  // The smallest header with the version 1 is 24 bytes
+  const unsigned MinHeaderSize = 24;
+  if (DataBuffer->getBufferSize() < MinHeaderSize)
+    return error(cgdata_error::bad_header);
+
+  auto *Start =
+      reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
+  auto *End =
+      reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
+  if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
+    return E;
+
+  if (hasOutlinedHashTree()) {
+    const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
+    if (Ptr >= End)
+      return error(cgdata_error::eof);
+    HashTreeRecord.deserialize(Ptr);
+  }
+
+  return success();
+}
+
+Expected<std::unique_ptr<CodeGenDataReader>>
+CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
+  // Set up the buffer to read.
+  auto BufferOrError = setupMemoryBuffer(Path, FS);
+  if (Error E = BufferOrError.takeError())
+    return std::move(E);
+  return CodeGenDataReader::create(std::move(BufferOrError.get()));
+}
+
+Expected<std::unique_ptr<CodeGenDataReader>>
+CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+  if (Buffer->getBufferSize() == 0)
+    return make_error<CGDataError>(cgdata_error::empty_cgdata);
+
+  std::unique_ptr<CodeGenDataReader> Reader;
+  // Create the reader.
+  if (IndexedCodeGenDataReader::hasFormat(*Buffer))
+    Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
+  else if (TextCodeGenDataReader::hasFormat(*Buffer))
+    Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
+  else
+    return make_error<CGDataError>(cgdata_error::malformed);
+
+  // Initialize the reader and return the result.
+  if (Error E = Reader->read())
+    return std::move(E);
+
+  return std::move(Reader);
+}
+
+bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
+  using namespace support;
+  if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
+    return false;
+
+  uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
+      DataBuffer.getBufferStart());
+  // Verify that it's magical.
+  return Magic == IndexedCGData::Magic;
+}
+
+bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
+  // Verify that this really looks like plain ASCII text by checking a
+  // 'reasonable' number of characters (up to the magic size).
+  StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
+  return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
+}
+Error TextCodeGenDataReader::read() {
+  using namespace support;
+
+  // Parse the custom header line by line.
+  for (; !Line.is_at_eof(); ++Line) {
+    // Skip empty or whitespace-only lines
+    if (Line->trim().empty())
+      continue;
+
+    if (!Line->starts_with(":"))
+      break;
+    StringRef Str = Line->drop_front().rtrim();
+    if (Str.equals_insensitive("outlined_hash_tree"))
+      DataKind |= CGDataKind::FunctionOutlinedHashTree;
+    else
+      return error(cgdata_error::bad_header);
+  }
+
+  // We treat an empty header (that is a comment # only) as a valid header.
+  if (Line.is_at_eof()) {
+    if (DataKind == CGDataKind::Unknown)
+      return Error::success();
+    return error(cgdata_error::bad_header);
+  }
+
+  // The YAML docs follow after the header.
+  const char *Pos = Line->data();
+  size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
+                reinterpret_cast<size_t>(Pos);
+  yaml::Input YOS(StringRef(Pos, Size));
+  if (hasOutlinedHashTree())
+    HashTreeRecord.deserializeYAML(YOS);
+
+  // TODO: Add more yaml cgdata in order
+
+  return Error::success();
+}
+} // end namespace llvm

diff  --git a/llvm/lib/CGData/CodeGenDataWriter.cpp b/llvm/lib/CGData/CodeGenDataWriter.cpp
new file mode 100644
index 00000000000000..5f638be0fefe74
--- /dev/null
+++ b/llvm/lib/CGData/CodeGenDataWriter.cpp
@@ -0,0 +1,125 @@
+//===- CodeGenDataWriter.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing codegen data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/CodeGenDataWriter.h"
+
+#define DEBUG_TYPE "cg-data-writer"
+
+using namespace llvm;
+
+void CGDataOStream::patch(ArrayRef<CGDataPatchItem> P) {
+  using namespace support;
+
+  if (IsFDOStream) {
+    raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
+    const uint64_t LastPos = FDOStream.tell();
+    for (const auto &K : P) {
+      FDOStream.seek(K.Pos);
+      for (int I = 0; I < K.N; I++)
+        write(K.D[I]);
+    }
+    // Reset the stream to the last position after patching so that users
+    // don't accidentally overwrite data. This makes it consistent with
+    // the string stream below which replaces the data directly.
+    FDOStream.seek(LastPos);
+  } else {
+    raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
+    std::string &Data = SOStream.str(); // with flush
+    for (const auto &K : P) {
+      for (int I = 0; I < K.N; I++) {
+        uint64_t Bytes =
+            endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
+        Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
+                     reinterpret_cast<const char *>(&Bytes), sizeof(uint64_t));
+      }
+    }
+  }
+}
+
+void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) {
+  assert(Record.HashTree && "empty hash tree in the record");
+  HashTreeRecord.HashTree = std::move(Record.HashTree);
+
+  DataKind |= CGDataKind::FunctionOutlinedHashTree;
+}
+
+Error CodeGenDataWriter::write(raw_fd_ostream &OS) {
+  CGDataOStream COS(OS);
+  return writeImpl(COS);
+}
+
+Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
+  using namespace support;
+  IndexedCGData::Header Header;
+  Header.Magic = IndexedCGData::Magic;
+  Header.Version = IndexedCGData::Version;
+
+  // Set the CGDataKind depending on the kind.
+  Header.DataKind = 0;
+  if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree))
+    Header.DataKind |=
+        static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
+
+  Header.OutlinedHashTreeOffset = 0;
+
+  // Only write up to the CGDataKind. We need to remember the offset of the
+  // remaining fields to allow back-patching later.
+  COS.write(Header.Magic);
+  COS.write32(Header.Version);
+  COS.write32(Header.DataKind);
+
+  // Save the location of Header.OutlinedHashTreeOffset field in \c COS.
+  OutlinedHashTreeOffset = COS.tell();
+
+  // Reserve the space for OutlinedHashTreeOffset field.
+  COS.write(0);
+
+  return Error::success();
+}
+
+Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
+  if (Error E = writeHeader(COS))
+    return E;
+
+  uint64_t OutlinedHashTreeFieldStart = COS.tell();
+  if (hasOutlinedHashTree())
+    HashTreeRecord.serialize(COS.OS);
+
+  // Back patch the offsets.
+  CGDataPatchItem PatchItems[] = {
+      {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}};
+  COS.patch(PatchItems);
+
+  return Error::success();
+}
+
+Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) {
+  if (hasOutlinedHashTree())
+    OS << "# Outlined stable hash tree\n:outlined_hash_tree\n";
+
+  // TODO: Add more data types in this header
+
+  return Error::success();
+}
+
+Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) {
+  if (Error E = writeHeaderText(OS))
+    return E;
+
+  yaml::Output YOS(OS);
+  if (hasOutlinedHashTree())
+    HashTreeRecord.serializeYAML(YOS);
+
+  // TODO: Write more yaml cgdata in order
+
+  return Error::success();
+}

diff  --git a/llvm/lib/CodeGenData/OutlinedHashTree.cpp b/llvm/lib/CGData/OutlinedHashTree.cpp
similarity index 98%
rename from llvm/lib/CodeGenData/OutlinedHashTree.cpp
rename to llvm/lib/CGData/OutlinedHashTree.cpp
index d64098098de62b..7bf8168e5afa17 100644
--- a/llvm/lib/CodeGenData/OutlinedHashTree.cpp
+++ b/llvm/lib/CGData/OutlinedHashTree.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGenData/OutlinedHashTree.h"
+#include "llvm/CGData/OutlinedHashTree.h"
 
 #define DEBUG_TYPE "outlined-hash-tree"
 

diff  --git a/llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp
similarity index 99%
rename from llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp
rename to llvm/lib/CGData/OutlinedHashTreeRecord.cpp
index d3c67904083888..d1d57fe3fc9f4c 100644
--- a/llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp
+++ b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp
@@ -13,7 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/OutlinedHashTreeRecord.h"
 #include "llvm/ObjectYAML/YAML.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"

diff  --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index 638c3bd6f90f53..503c77cb13bd07 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -9,8 +9,8 @@ add_subdirectory(FileCheck)
 add_subdirectory(InterfaceStub)
 add_subdirectory(IRPrinter)
 add_subdirectory(IRReader)
+add_subdirectory(CGData)
 add_subdirectory(CodeGen)
-add_subdirectory(CodeGenData)
 add_subdirectory(CodeGenTypes)
 add_subdirectory(BinaryFormat)
 add_subdirectory(Bitcode)

diff  --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 8abc1533362512..0f449fa2d45be9 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -74,6 +74,7 @@ set(LLVM_TEST_DEPENDS
           llvm-c-test
           llvm-cat
           llvm-cfi-verify
+          llvm-cgdata
           llvm-config
           llvm-cov
           llvm-ctxprof-util

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index e5e3dc7e1b4bd0..bee7aa3903a1f5 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -180,6 +180,7 @@ def get_asan_rtlib():
         "llvm-addr2line",
         "llvm-bcanalyzer",
         "llvm-bitcode-strip",
+        "llvm-cgdata",
         "llvm-config",
         "llvm-cov",
         "llvm-ctxprof-util",

diff  --git a/llvm/test/tools/llvm-cgdata/convert.test b/llvm/test/tools/llvm-cgdata/convert.test
new file mode 100644
index 00000000000000..632a7366d56a05
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/convert.test
@@ -0,0 +1,32 @@
+# Test dump between the binary and text formats.
+
+RUN: split-file %s %t
+
+RUN: llvm-cgdata --convert --format binary %t/dump.cgtext -o %t/dump.cgdata
+RUN: llvm-cgdata --convert --format text %t/dump.cgdata -o %t/dump-round.cgtext
+RUN: llvm-cgdata -c -f binary %t/dump-round.cgtext -o %t/dump-round.cgdata
+RUN: llvm-cgdata -c -f text %t/dump-round.cgtext -o %t/dump-round-round.cgtext
+RUN: 
diff  %t/dump.cgdata %t/dump-round.cgdata
+RUN: 
diff  %t/dump-round.cgtext %t/dump-round-round.cgtext
+
+;--- dump.cgtext
+# Outlined stable hash tree
+:outlined_hash_tree
+---
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2, 3 ]
+2:
+  Hash:            0x3
+  Terminals:       5
+  SuccessorIds:    [  ]
+3:
+  Hash:            0x2
+  Terminals:       4
+  SuccessorIds:    [  ]
+...

diff  --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
new file mode 100644
index 00000000000000..70d5ea4b800630
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/empty.test
@@ -0,0 +1,35 @@
+# Test no input file
+RUN: not llvm-cgdata --convert --output - 2>&1 | FileCheck %s --check-prefix=NOFILE --ignore-case
+NOFILE: error: No input file is specified.
+
+# Test for empty cgdata file, which is invalid.
+RUN: touch %t_emptyfile.cgtext
+RUN: not llvm-cgdata --convert %t_emptyfile.cgtext --format text 2>&1 | FileCheck %s --check-prefix=EMPTY
+EMPTY: {{.}}emptyfile.cgtext: empty codegen data
+
+# Test for empty header in the text format. It can be converted to a valid binary file.
+RUN: printf '#' > %t_emptyheader.cgtext
+RUN: llvm-cgdata --convert %t_emptyheader.cgtext --format binary -o %t_emptyheader.cgdata
+
+# Without any cgdata other than the header, no data shows by default.
+RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
+
+# The version number appears when asked, as it's in the header
+RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
+VERSION: Version: 1
+
+# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
+RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
+
+# Synthesize a header only cgdata.
+# struct Header {
+#   uint64_t Magic;
+#   uint32_t Version;
+#   uint32_t DataKind;
+#   uint64_t OutlinedHashTreeOffset;
+# }
+RUN: printf '\xffcgdata\x81' > %t_header.cgdata
+RUN: printf '\x01\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
+RUN: 
diff  %t_header.cgdata %t_emptyheader.cgdata

diff  --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
new file mode 100644
index 00000000000000..c992174505c1ad
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/error.test
@@ -0,0 +1,38 @@
+# Test various error cases
+
+# Synthesize a header only cgdata.
+# struct Header {
+#   uint64_t Magic;
+#   uint32_t Version;
+#   uint32_t DataKind;
+#   uint64_t OutlinedHashTreeOffset;
+# }
+RUN: touch %t_empty.cgdata
+RUN: not llvm-cgdata --show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY
+EMPTY: {{.}}cgdata: empty codegen data
+
+# Not a magic.
+RUN: printf '\xff' > %t_malformed.cgdata
+RUN: not llvm-cgdata --show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix=MALFORMED
+MALFORMED: {{.}}cgdata: malformed codegen data
+
+# The minimum header size is 24.
+RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
+RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s  --check-prefix=CORRUPT
+CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
+
+# The current version 1 while the header says 2.
+RUN: printf '\xffcgdata\x81' > %t_version.cgdata
+RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
+RUN: not llvm-cgdata --show %t_version.cgdata 2>&1 | FileCheck %s  --check-prefix=BAD_VERSION
+BAD_VERSION: {{.}}cgdata: unsupported codegen data version
+
+# Header says an outlined hash tree, but the file ends after the header.
+RUN: printf '\xffcgdata\x81' > %t_eof.cgdata
+RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
+RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
+RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
+RUN: not llvm-cgdata --show %t_eof.cgdata 2>&1 | FileCheck %s  --check-prefix=EOF
+EOF: {{.}}cgdata: end of File

diff  --git a/llvm/test/tools/llvm-cgdata/merge-archive.test b/llvm/test/tools/llvm-cgdata/merge-archive.test
new file mode 100644
index 00000000000000..d70ac7c3c938d8
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-archive.test
@@ -0,0 +1,90 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge an archive that has two object files having cgdata (__llvm_outline)
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
+
+# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed -ie "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
+
+# Make an archive from two object files
+RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
+
+# Merge the archive into the codegen data file.
+RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata
+RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
+CHECK: Outlined hash tree:
+CHECK-NEXT:  Total Node Count: 4
+CHECK-NEXT:  Terminal Node Count: 2
+CHECK-NEXT:  Depth: 2
+
+RUN: llvm-cgdata --convert %t/merge-archive.cgdata | FileCheck %s --check-prefix=TREE
+TREE: # Outlined stable hash tree
+TREE-NEXT: :outlined_hash_tree
+TREE-NEXT: ---
+TREE-NEXT: 0:
+TREE-NEXT:   Hash:            0x0
+TREE-NEXT:   Terminals:       0
+TREE-NEXT:   SuccessorIds:    [ 1 ]
+TREE-NEXT: 1:
+TREE-NEXT:   Hash:            0x1
+TREE-NEXT:   Terminals:       0
+TREE-NEXT:   SuccessorIds:    [ 2, 3 ]
+TREE-NEXT: 2:
+TREE-NEXT:   Hash:            0x3
+TREE-NEXT:   Terminals:       5
+TREE-NEXT:   SuccessorIds:    [  ]
+TREE-NEXT: 3:
+TREE-NEXT:   Hash:            0x2
+TREE-NEXT:   Terminals:       4
+TREE-NEXT:   SuccessorIds:    [  ]
+TREE-NEXT: ...
+
+;--- raw-1.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x2
+  Terminals:       4
+  SuccessorIds:    [  ]
+...
+
+;--- merge-1.ll
+ at .data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
+
+
+;--- raw-2.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x3
+  Terminals:       5
+  SuccessorIds:    [  ]
+...
+
+;--- merge-2.ll
+ at .data = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"

diff  --git a/llvm/test/tools/llvm-cgdata/merge-concat.test b/llvm/test/tools/llvm-cgdata/merge-concat.test
new file mode 100644
index 00000000000000..cc39c673cf9a5e
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-concat.test
@@ -0,0 +1,83 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge a binary file (e.g., a linked executable) having concatenated cgdata (__llvm_outline)
+
+RUN: split-file %s %t
+
+# Synthesize two sets of raw cgdata without the header (24 byte) from the indexed cgdata.
+# Concatenate them in merge-concat.ll
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat.ll
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed -ie "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat.ll
+
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
+RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata
+RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
+CHECK: Outlined hash tree:
+CHECK-NEXT:  Total Node Count: 4
+CHECK-NEXT:  Terminal Node Count: 2
+CHECK-NEXT:  Depth: 2
+
+RUN: llvm-cgdata --convert %t/merge-concat.cgdata | FileCheck %s --check-prefix=TREE
+TREE: # Outlined stable hash tree
+TREE-NEXT: :outlined_hash_tree
+TREE-NEXT: ---
+TREE-NEXT: 0:
+TREE-NEXT:   Hash:            0x0
+TREE-NEXT:   Terminals:       0
+TREE-NEXT:   SuccessorIds:    [ 1 ]
+TREE-NEXT: 1:
+TREE-NEXT:   Hash:            0x1
+TREE-NEXT:   Terminals:       0
+TREE-NEXT:   SuccessorIds:    [ 2, 3 ]
+TREE-NEXT: 2:
+TREE-NEXT:   Hash:            0x3
+TREE-NEXT:   Terminals:       5
+TREE-NEXT:   SuccessorIds:    [  ]
+TREE-NEXT: 3:
+TREE-NEXT:   Hash:            0x2
+TREE-NEXT:   Terminals:       4
+TREE-NEXT:   SuccessorIds:    [  ]
+TREE-NEXT: ...
+
+;--- raw-1.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x2
+  Terminals:       4
+  SuccessorIds:    [  ]
+...
+
+;--- raw-2.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x3
+  Terminals:       5
+  SuccessorIds:    [  ]
+...
+
+;--- merge-concat.ll
+
+; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other words, the following two trees are encoded back-to-back in a binary format.
+ at .data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
+ at .data2 = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"

diff  --git a/llvm/test/tools/llvm-cgdata/merge-double.test b/llvm/test/tools/llvm-cgdata/merge-double.test
new file mode 100644
index 00000000000000..950a88c66f7bb4
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-double.test
@@ -0,0 +1,87 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge two object files having cgdata (__llvm_outline)
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
+
+# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed -ie "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
+
+# Merge two object files into the codegen data file.
+RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
+
+RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
+CHECK: Outlined hash tree:
+CHECK-NEXT:  Total Node Count: 4
+CHECK-NEXT:  Terminal Node Count: 2
+CHECK-NEXT:  Depth: 2
+
+RUN: llvm-cgdata --convert %t/merge.cgdata | FileCheck %s --check-prefix=TREE
+TREE: # Outlined stable hash tree
+TREE-NEXT: :outlined_hash_tree
+TREE-NEXT: ---
+TREE-NEXT: 0:
+TREE-NEXT:   Hash:            0x0
+TREE-NEXT:   Terminals:       0
+TREE-NEXT:   SuccessorIds:    [ 1 ]
+TREE-NEXT: 1:
+TREE-NEXT:   Hash:            0x1
+TREE-NEXT:   Terminals:       0
+TREE-NEXT:   SuccessorIds:    [ 2, 3 ]
+TREE-NEXT: 2:
+TREE-NEXT:   Hash:            0x3
+TREE-NEXT:   Terminals:       5
+TREE-NEXT:   SuccessorIds:    [  ]
+TREE-NEXT: 3:
+TREE-NEXT:   Hash:            0x2
+TREE-NEXT:   Terminals:       4
+TREE-NEXT:   SuccessorIds:    [  ]
+TREE-NEXT: ...
+
+;--- raw-1.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x2
+  Terminals:       4
+  SuccessorIds:    [  ]
+...
+
+;--- merge-1.ll
+ at .data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
+
+;--- raw-2.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x3
+  Terminals:       5
+  SuccessorIds:    [  ]
+...
+
+;--- merge-2.ll
+ at .data = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"

diff  --git a/llvm/test/tools/llvm-cgdata/merge-single.test b/llvm/test/tools/llvm-cgdata/merge-single.test
new file mode 100644
index 00000000000000..783c7b979f541e
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-single.test
@@ -0,0 +1,49 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Test merge a single object file into a cgdata
+
+RUN: split-file %s %t
+
+# Merge an object file that has no cgdata (__llvm_outline). It still produces a header only cgdata.
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-empty.ll -o %t/merge-empty.o
+RUN: llvm-cgdata --merge %t/merge-empty.o --output %t/merge-empty.cgdata
+# No summary appear with the header only cgdata.
+RUN: llvm-cgdata --show %t/merge-empty.cgdata | count 0
+
+# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata
+RUN: od -t x1 -j 24 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
+
+RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o
+
+# Merge an object file having cgdata (__llvm_outline)
+RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata
+RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
+CHECK: Outlined hash tree:
+CHECK-NEXT:  Total Node Count: 3
+CHECK-NEXT:  Terminal Node Count: 1
+CHECK-NEXT:  Depth: 2
+
+;--- merge-empty.ll
+ at .data = private unnamed_addr constant [1 x i8] c"\01"
+
+;--- raw-single.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x2
+  Terminals:       4
+  SuccessorIds:    [  ]
+...
+
+;--- merge-single.ll
+ at .data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"

diff  --git a/llvm/test/tools/llvm-cgdata/show.test b/llvm/test/tools/llvm-cgdata/show.test
new file mode 100644
index 00000000000000..b47ad4978ef0b3
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/show.test
@@ -0,0 +1,30 @@
+# Test show
+
+RUN: split-file %s %t
+RUN: llvm-cgdata --show %t/show.cgtext | FileCheck %s
+
+CHECK: Outlined hash tree:
+CHECK-NEXT:   Total Node Count: 3
+CHECK-NEXT:   Terminal Node Count: 1
+CHECK-NEXT:   Depth: 2
+
+# Convert the text file to the binary file
+RUN: llvm-cgdata --convert --format binary %t/show.cgtext -o %t/show.cgdata
+RUN: llvm-cgdata --show %t/show.cgdata | FileCheck %s
+
+;--- show.cgtext
+:outlined_hash_tree
+---
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x2
+  Terminals:       3
+  SuccessorIds:    [  ]
+...

diff  --git a/llvm/tools/llvm-cgdata/CMakeLists.txt b/llvm/tools/llvm-cgdata/CMakeLists.txt
new file mode 100644
index 00000000000000..556bc388306a3c
--- /dev/null
+++ b/llvm/tools/llvm-cgdata/CMakeLists.txt
@@ -0,0 +1,21 @@
+set(LLVM_LINK_COMPONENTS
+  CGData
+  CodeGen
+  Core
+  Object
+  Option
+  Support
+  )
+
+set(LLVM_TARGET_DEFINITIONS Opts.td)
+tablegen(LLVM Opts.inc -gen-opt-parser-defs)
+add_public_tablegen_target(CGDataOptsTableGen)
+
+add_llvm_tool(llvm-cgdata
+  llvm-cgdata.cpp
+
+  DEPENDS
+  intrinsics_gen
+  CGDataOptsTableGen
+  GENERATE_DRIVER
+  )

diff  --git a/llvm/tools/llvm-cgdata/Opts.td b/llvm/tools/llvm-cgdata/Opts.td
new file mode 100644
index 00000000000000..b2cfc6a85bbd32
--- /dev/null
+++ b/llvm/tools/llvm-cgdata/Opts.td
@@ -0,0 +1,32 @@
+include "llvm/Option/OptParser.td"
+
+class F<string letter, string help> : Flag<["-"], letter>, HelpText<help>;
+class FF<string name, string help> : Flag<["--"], name>, HelpText<help>;
+
+// General options
+def generic_group : OptionGroup<"Genric Options">, HelpText<"Generic Options">;
+def help : FF<"help", "Display this help">, Group<generic_group>;
+def : F<"h", "Alias for --help">, Alias<help>, Group<generic_group>;
+def version : FF<"version", "Display the LLVM version">, Group<generic_group>;
+def : F<"v", "Alias for --version">, Alias<version>, Group<generic_group>;
+
+// Action options
+def action_group : OptionGroup<"Action">, HelpText<"Action (required)">;
+def show : FF<"show", "Show summary of the (indexed) codegen data file.">,
+  Group<action_group>;
+def : F<"s", "Alias for --show">, Alias<show>, Group<action_group>;
+def convert : FF<"convert", "Convert the (indexed) codegen data file in either text or binary format.">,
+  Group<action_group>;
+def : F<"c", "Alias for --convert">, Alias<convert>, Group<action_group>;
+def merge : FF<"merge", "Take binary files having raw codegen data in custom sections, and merge them into an indexed codegen data file.">,
+  Group<action_group>;
+def : F<"m", "Alias for --merge">, Alias<merge>, Group<action_group>;
+
+// Additional options
+def cgdata_version : FF<"cgdata-version", "Display the cgdata version">;
+def output : Option<["--"], "output", KIND_SEPARATE>,
+             HelpText<"Specify the name for the output file to be created">, MetaVarName<"<file>">;
+def : JoinedOrSeparate<["-"], "o">, Alias<output>, MetaVarName<"<file>">, HelpText<"Alias for --output">;
+def format : Option<["--"], "format", KIND_SEPARATE>,
+             HelpText<"Specify the output format (text or binary)">, MetaVarName<"<value>">;
+def : JoinedOrSeparate<["-"], "f">, Alias<format>, HelpText<"Alias for --format">;

diff  --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
new file mode 100644
index 00000000000000..3104242070f34e
--- /dev/null
+++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
@@ -0,0 +1,354 @@
+//===-- llvm-cgdata.cpp - LLVM CodeGen Data Tool --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// llvm-cgdata parses raw codegen data embedded in compiled binary files, and
+// merges them into a single .cgdata file. It can also inspect and maninuplate
+// a .cgdata file. This .cgdata can contain various codegen data like outlining
+// information, and it can be used to optimize the code in the subsequent build.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CGData/CodeGenDataReader.h"
+#include "llvm/CGData/CodeGenDataWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/LLVMDriver.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+enum CGDataFormat {
+  Invalid,
+  Text,
+  Binary,
+};
+
+enum CGDataAction {
+  Convert,
+  Merge,
+  Show,
+};
+
+// Command-line option boilerplate.
+namespace {
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
+#include "Opts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE)                                                    \
+  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
+  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
+                                                std::size(NAME##_init) - 1);
+#include "Opts.inc"
+#undef PREFIX
+
+using namespace llvm::opt;
+static constexpr opt::OptTable::Info InfoTable[] = {
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
+#include "Opts.inc"
+#undef OPTION
+};
+
+class CGDataOptTable : public opt::GenericOptTable {
+public:
+  CGDataOptTable() : GenericOptTable(InfoTable) {}
+};
+} // end anonymous namespace
+
+// Options
+static StringRef ToolName;
+static StringRef OutputFilename = "-";
+static StringRef Filename;
+static bool ShowCGDataVersion;
+static CGDataAction Action;
+static std::optional<CGDataFormat> OutputFormat;
+static std::vector<std::string> InputFilenames;
+
+// TODO: Add a doc, https://llvm.org/docs/CommandGuide/llvm-cgdata.html
+
+static void exitWithError(Twine Message, std::string Whence = "",
+                          std::string Hint = "") {
+  WithColor::error();
+  if (!Whence.empty())
+    errs() << Whence << ": ";
+  errs() << Message << "\n";
+  if (!Hint.empty())
+    WithColor::note() << Hint << "\n";
+  ::exit(1);
+}
+
+static void exitWithError(Error E, StringRef Whence = "") {
+  if (E.isA<CGDataError>()) {
+    handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
+      exitWithError(IPE.message(), std::string(Whence));
+    });
+    return;
+  }
+
+  exitWithError(toString(std::move(E)), std::string(Whence));
+}
+
+static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
+  exitWithError(EC.message(), std::string(Whence));
+}
+
+static int convert_main(int argc, const char *argv[]) {
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename, EC,
+                    OutputFormat == CGDataFormat::Text
+                        ? sys::fs::OF_TextWithCRLF
+                        : sys::fs::OF_None);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  auto FS = vfs::getRealFileSystem();
+  auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
+  if (Error E = ReaderOrErr.takeError())
+    exitWithError(std::move(E), Filename);
+
+  CodeGenDataWriter Writer;
+  auto Reader = ReaderOrErr->get();
+  if (Reader->hasOutlinedHashTree()) {
+    OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree());
+    Writer.addRecord(Record);
+  }
+
+  if (OutputFormat == CGDataFormat::Text) {
+    if (Error E = Writer.writeText(OS))
+      exitWithError(std::move(E));
+  } else {
+    if (Error E = Writer.write(OS))
+      exitWithError(std::move(E));
+  }
+
+  return 0;
+}
+
+static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
+                         OutlinedHashTreeRecord &GlobalOutlineRecord);
+
+static bool handleArchive(StringRef Filename, Archive &Arch,
+                          OutlinedHashTreeRecord &GlobalOutlineRecord) {
+  bool Result = true;
+  Error Err = Error::success();
+  for (const auto &Child : Arch.children(Err)) {
+    auto BuffOrErr = Child.getMemoryBufferRef();
+    if (Error E = BuffOrErr.takeError())
+      exitWithError(std::move(E), Filename);
+    auto NameOrErr = Child.getName();
+    if (Error E = NameOrErr.takeError())
+      exitWithError(std::move(E), Filename);
+    std::string Name = (Filename + "(" + NameOrErr.get() + ")").str();
+    Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord);
+  }
+  if (Err)
+    exitWithError(std::move(Err), Filename);
+  return Result;
+}
+
+static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
+                         OutlinedHashTreeRecord &GlobalOutlineRecord) {
+  Expected<std::unique_ptr<object::Binary>> BinOrErr =
+      object::createBinary(Buffer);
+  if (Error E = BinOrErr.takeError())
+    exitWithError(std::move(E), Filename);
+
+  bool Result = true;
+  if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
+    if (Error E =
+            CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord))
+      exitWithError(std::move(E), Filename);
+  } else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) {
+    Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord);
+  } else {
+    // TODO: Support for the MachO universal binary format.
+    errs() << "Error: unsupported binary file: " << Filename << "\n";
+    Result = false;
+  }
+
+  return Result;
+}
+
+static bool handleFile(StringRef Filename,
+                       OutlinedHashTreeRecord &GlobalOutlineRecord) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+      MemoryBuffer::getFileOrSTDIN(Filename);
+  if (std::error_code EC = BuffOrErr.getError())
+    exitWithErrorCode(EC, Filename);
+  return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord);
+}
+
+static int merge_main(int argc, const char *argv[]) {
+  bool Result = true;
+  OutlinedHashTreeRecord GlobalOutlineRecord;
+  for (auto &Filename : InputFilenames)
+    Result &= handleFile(Filename, GlobalOutlineRecord);
+
+  if (!Result)
+    exitWithError("failed to merge codegen data files.");
+
+  CodeGenDataWriter Writer;
+  if (!GlobalOutlineRecord.empty())
+    Writer.addRecord(GlobalOutlineRecord);
+
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename, EC,
+                    OutputFormat == CGDataFormat::Text
+                        ? sys::fs::OF_TextWithCRLF
+                        : sys::fs::OF_None);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  if (OutputFormat == CGDataFormat::Text) {
+    if (Error E = Writer.writeText(OS))
+      exitWithError(std::move(E));
+  } else {
+    if (Error E = Writer.write(OS))
+      exitWithError(std::move(E));
+  }
+
+  return 0;
+}
+
+static int show_main(int argc, const char *argv[]) {
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  auto FS = vfs::getRealFileSystem();
+  auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
+  if (Error E = ReaderOrErr.takeError())
+    exitWithError(std::move(E), Filename);
+
+  auto Reader = ReaderOrErr->get();
+  if (ShowCGDataVersion)
+    OS << "Version: " << Reader->getVersion() << "\n";
+
+  if (Reader->hasOutlinedHashTree()) {
+    auto Tree = Reader->releaseOutlinedHashTree();
+    OS << "Outlined hash tree:\n";
+    OS << "  Total Node Count: " << Tree->size() << "\n";
+    OS << "  Terminal Node Count: " << Tree->size(/*GetTerminalCountOnly=*/true)
+       << "\n";
+    OS << "  Depth: " << Tree->depth() << "\n";
+  }
+
+  return 0;
+}
+
+static void parseArgs(int argc, char **argv) {
+  CGDataOptTable Tbl;
+  ToolName = argv[0];
+  llvm::BumpPtrAllocator A;
+  llvm::StringSaver Saver{A};
+  llvm::opt::InputArgList Args =
+      Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
+        llvm::errs() << Msg << '\n';
+        std::exit(1);
+      });
+
+  if (Args.hasArg(OPT_help)) {
+    Tbl.printHelp(
+        llvm::outs(),
+        "llvm-cgdata <action> [options] (<binary files>|<.cgdata file>)",
+        ToolName.str().c_str());
+    std::exit(0);
+  }
+  if (Args.hasArg(OPT_version)) {
+    cl::PrintVersionMessage();
+    std::exit(0);
+  }
+
+  ShowCGDataVersion = Args.hasArg(OPT_cgdata_version);
+
+  if (opt::Arg *A = Args.getLastArg(OPT_format)) {
+    StringRef OF = A->getValue();
+    OutputFormat = StringSwitch<CGDataFormat>(OF)
+                       .Case("text", CGDataFormat::Text)
+                       .Case("binary", CGDataFormat::Binary)
+                       .Default(CGDataFormat::Invalid);
+    if (OutputFormat == CGDataFormat::Invalid)
+      exitWithError("unsupported format '" + OF + "'");
+  }
+
+  InputFilenames = Args.getAllArgValues(OPT_INPUT);
+  if (InputFilenames.empty())
+    exitWithError("No input file is specified.");
+  Filename = InputFilenames[0];
+
+  if (Args.hasArg(OPT_output)) {
+    OutputFilename = Args.getLastArgValue(OPT_output);
+    for (auto &Filename : InputFilenames)
+      if (Filename == OutputFilename)
+        exitWithError(
+            "Input file name cannot be the same as the output file name!\n");
+  }
+
+  opt::Arg *ActionArg = nullptr;
+  for (opt::Arg *Arg : Args.filtered(OPT_action_group)) {
+    if (ActionArg)
+      exitWithError("Only one action is allowed.");
+    ActionArg = Arg;
+  }
+  if (!ActionArg)
+    exitWithError("One action is required.");
+
+  switch (ActionArg->getOption().getID()) {
+  case OPT_show:
+    if (InputFilenames.size() != 1)
+      exitWithError("only one input file is allowed.");
+    Action = CGDataAction::Show;
+    break;
+  case OPT_convert:
+    // The default output format is text for convert.
+    if (!OutputFormat)
+      OutputFormat = CGDataFormat::Text;
+    if (InputFilenames.size() != 1)
+      exitWithError("only one input file is allowed.");
+    Action = CGDataAction::Convert;
+    break;
+  case OPT_merge:
+    // The default output format is binary for merge.
+    if (!OutputFormat)
+      OutputFormat = CGDataFormat::Binary;
+    Action = CGDataAction::Merge;
+    break;
+  default:
+    llvm_unreachable("unrecognized action");
+  }
+}
+
+int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {
+  const char **argv = const_cast<const char **>(argvNonConst);
+  parseArgs(argc, argvNonConst);
+
+  switch (Action) {
+  case CGDataAction::Convert:
+    return convert_main(argc, argv);
+  case CGDataAction::Merge:
+    return merge_main(argc, argv);
+  case CGDataAction::Show:
+    return show_main(argc, argv);
+  default:
+    llvm_unreachable("unrecognized action");
+  }
+
+  return 1;
+}

diff  --git a/llvm/unittests/CodeGenData/CMakeLists.txt b/llvm/unittests/CGData/CMakeLists.txt
similarity index 94%
rename from llvm/unittests/CodeGenData/CMakeLists.txt
rename to llvm/unittests/CGData/CMakeLists.txt
index 3d821b87e29d8c..9cedab56d3f6bc 100644
--- a/llvm/unittests/CodeGenData/CMakeLists.txt
+++ b/llvm/unittests/CGData/CMakeLists.txt
@@ -1,7 +1,7 @@
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
+  CGData
   CodeGen
-  CodeGenData
   Core
   Support
   )

diff  --git a/llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp b/llvm/unittests/CGData/OutlinedHashTreeRecordTest.cpp
similarity index 98%
rename from llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp
rename to llvm/unittests/CGData/OutlinedHashTreeRecordTest.cpp
index aa7ad4a33754ff..a614a48dd7a439 100644
--- a/llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp
+++ b/llvm/unittests/CGData/OutlinedHashTreeRecordTest.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/OutlinedHashTreeRecord.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 

diff  --git a/llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp b/llvm/unittests/CGData/OutlinedHashTreeTest.cpp
similarity index 98%
rename from llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp
rename to llvm/unittests/CGData/OutlinedHashTreeTest.cpp
index 637ab3cd08c1ce..2d1ec8b05ab2a9 100644
--- a/llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp
+++ b/llvm/unittests/CGData/OutlinedHashTreeTest.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGenData/OutlinedHashTree.h"
+#include "llvm/CGData/OutlinedHashTree.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 

diff  --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt
index 49ed6c8fb6c42f..911ede701982f6 100644
--- a/llvm/unittests/CMakeLists.txt
+++ b/llvm/unittests/CMakeLists.txt
@@ -20,8 +20,8 @@ add_subdirectory(AsmParser)
 add_subdirectory(BinaryFormat)
 add_subdirectory(Bitcode)
 add_subdirectory(Bitstream)
+add_subdirectory(CGData)
 add_subdirectory(CodeGen)
-add_subdirectory(CodeGenData)
 add_subdirectory(DebugInfo)
 add_subdirectory(Debuginfod)
 add_subdirectory(Demangle)


        


More information about the llvm-commits mailing list