[llvm] [CGData] llvm-cgdata (PR #89884)

Wed Jul 10 16:44:52 PDT 2024

================
@@ -0,0 +1,162 @@
+//===- CodeGenDataWriter.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing codegen data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGenData/CodeGenDataWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+
+#define DEBUG_TYPE "cg-data-writer"
+
+using namespace llvm;
+
+namespace llvm {
+
+/// A struct to define how the data stream should be patched.
+struct CGDataPatchItem {
+  uint64_t Pos; // Where to patch.
+  uint64_t *D;  // Pointer to an array of source data.
+  int N;        // Number of elements in \c D array.
+};
+
+// A wrapper class to abstract writer stream with support of bytes
+// back patching.
+class CGDataOStream {
+public:
+  CGDataOStream(raw_fd_ostream &FD)
+      : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
+  CGDataOStream(raw_string_ostream &STR)
+      : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
+
+  uint64_t tell() { return OS.tell(); }
+  void write(uint64_t V) { LE.write<uint64_t>(V); }
+  void write32(uint32_t V) { LE.write<uint32_t>(V); }
+  void write8(uint8_t V) { LE.write<uint8_t>(V); }
+
+  // \c patch can only be called when all data is written and flushed.
+  // For raw_string_ostream, the patch is done on the target string
+  // directly and it won't be reflected in the stream's internal buffer.
+  void patch(ArrayRef<CGDataPatchItem> P) {
+    using namespace support;
+
+    if (IsFDOStream) {
+      raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
+      const uint64_t LastPos = FDOStream.tell();
+      for (const auto &K : P) {
+        FDOStream.seek(K.Pos);
+        for (int I = 0; I < K.N; I++)
+          write(K.D[I]);
+      }
+      // Reset the stream to the last position after patching so that users
+      // don't accidentally overwrite data. This makes it consistent with
+      // the string stream below which replaces the data directly.
+      FDOStream.seek(LastPos);
+    } else {
+      raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
+      std::string &Data = SOStream.str(); // with flush
+      for (const auto &K : P) {
+        for (int I = 0; I < K.N; I++) {
+          uint64_t Bytes =
+              endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
+          Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
+                       (const char *)&Bytes, sizeof(uint64_t));
+        }
+      }
+    }
+  }
+
+  // If \c OS is an instance of \c raw_fd_ostream, this field will be
+  // true. Otherwise, \c OS will be an raw_string_ostream.
+  bool IsFDOStream;
+  raw_ostream &OS;
+  support::endian::Writer LE;
+};
+
+} // end namespace llvm
+
+void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) {
+  assert(Record.HashTree && "empty hash tree in the record");
+  HashTreeRecord.HashTree = std::move(Record.HashTree);
+
+  DataKind |= CGDataKind::FunctionOutlinedHashTree;
+}
+
+Error CodeGenDataWriter::write(raw_fd_ostream &OS) {
+  CGDataOStream COS(OS);
+  return writeImpl(COS);
+}
+
+Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
+  using namespace support;
+  IndexedCGData::Header Header;
+  Header.Magic = IndexedCGData::Magic;
+  Header.Version = IndexedCGData::Version;
+
+  // Set the CGDataKind depending on the kind.
+  Header.DataKind = 0;
+  if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree))
+    Header.DataKind |=
+        static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
+
+  Header.OutlinedHashTreeOffset = 0;
+
+  // Only write up to the CGDataKind. We need to remember the offset of the
+  // remaining fields to allow back-patching later.
+  COS.write(Header.Magic);
+  COS.write32(Header.Version);
+  COS.write32(Header.DataKind);
----------------
kyulee-com wrote:

Like the IRPGO case -- https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/ProfileData/InstrProf.h#L1117-L1124, I aim to maintain a simplified header while still capturing any discrepancies. Details such as the module name may change based on client usage and can be distinguished by using a different DataKind. I expect that these details can be encoded within their respective data blobs as needed.


https://github.com/llvm/llvm-project/pull/89884