[llvm] [CGData] Skip reading Names into StableFunctionMap in non-assertion b… (PR #142095)

Zhaoxuan Jiang via llvm-commits llvm-commits at lists.llvm.org
Fri May 30 00:09:31 PDT 2025


https://github.com/nocchijiang created https://github.com/llvm/llvm-project/pull/142095

…uilds

Names are used for debugging purpose and have no impact on codegen. For a non-trivial project, reading them consumes a lot of memory and slows down the compilation significantly. This patch adds a field in the serialized CGData to remember the total size of Names, and skips reading Names by advancing the pointer when deserializing for the Use mode in non-assertion builds.

>From e9eef549505358bc00153c5bc0e416ae68f51cd8 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Fri, 30 May 2025 14:28:09 +0800
Subject: [PATCH] [CGData] Skip reading Names into StableFunctionMap in
 non-assertion builds

Names are used for debugging purpose and have no impact on codegen. For
a non-trivial project, reading them consumes a lot of memory and slows
down the compilation significantly. This patch adds a field in the
serialized CGData to remember the total size of Names, and skips reading
Names by advancing the pointer when deserializing for the Use mode in
non-assertion builds.
---
 llvm/include/llvm/CGData/CGDataPatchItem.h    | 33 ++++++++++++
 llvm/include/llvm/CGData/CodeGenData.h        |  3 ++
 llvm/include/llvm/CGData/CodeGenData.inc      |  2 +-
 llvm/include/llvm/CGData/CodeGenDataReader.h  | 27 ++++++++--
 llvm/include/llvm/CGData/CodeGenDataWriter.h  | 25 +++++-----
 .../llvm/CGData/StableFunctionMapRecord.h     | 10 ++--
 llvm/lib/CGData/CodeGenData.cpp               | 11 +++-
 llvm/lib/CGData/CodeGenDataReader.cpp         | 10 ++--
 llvm/lib/CGData/CodeGenDataWriter.cpp         | 36 ++++++++++---
 llvm/lib/CGData/StableFunctionMapRecord.cpp   | 50 +++++++++++++------
 llvm/lib/CodeGen/GlobalMergeFunctions.cpp     |  6 ++-
 llvm/test/tools/llvm-cgdata/empty.test        |  4 +-
 llvm/test/tools/llvm-cgdata/error.test        |  4 +-
 .../merge-combined-funcmap-hashtree.test      |  2 +-
 .../llvm-cgdata/merge-funcmap-archive.test    |  4 +-
 .../llvm-cgdata/merge-funcmap-concat.test     |  4 +-
 .../llvm-cgdata/merge-funcmap-double.test     |  4 +-
 .../llvm-cgdata/merge-funcmap-single.test     |  2 +-
 .../CGData/StableFunctionMapRecordTest.cpp    |  6 ++-
 19 files changed, 180 insertions(+), 63 deletions(-)
 create mode 100644 llvm/include/llvm/CGData/CGDataPatchItem.h

diff --git a/llvm/include/llvm/CGData/CGDataPatchItem.h b/llvm/include/llvm/CGData/CGDataPatchItem.h
new file mode 100644
index 0000000000000..d13f89b032542
--- /dev/null
+++ b/llvm/include/llvm/CGData/CGDataPatchItem.h
@@ -0,0 +1,33 @@
+//===- CGDataPatchItem.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for patching codegen data.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_CGDATAPATCHITEM_H
+#define LLVM_CGDATA_CGDATAPATCHITEM_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+
+/// A struct to define how the data stream should be patched.
+struct CGDataPatchItem {
+  // Where to patch.
+  uint64_t Pos;
+  // Source data.
+  OwningArrayRef<uint64_t> D;
+
+  CGDataPatchItem(uint64_t Pos, const uint64_t *D, int N)
+      : Pos(Pos), D(ArrayRef<uint64_t>(D, N)) {}
+};
+
+} // namespace llvm
+
+#endif // LLVM_CGDATA_CGDATAPATCHITEM_H
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index ad7fa579f7508..38b96b72ccac6 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -282,6 +282,9 @@ enum CGDataVersion {
   Version1 = 1,
   // Version 2 supports the stable function merging map.
   Version2 = 2,
+  // Version 3 adds the total size of the Names in the stable function map so
+  // we can skip reading them into the memory for non-assertion builds.
+  Version3 = 3,
   CurrentVersion = CG_DATA_INDEX_VERSION
 };
 const uint64_t Version = CGDataVersion::CurrentVersion;
diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc
index e0ae7a51024d8..94de4c0b017a2 100644
--- a/llvm/include/llvm/CGData/CodeGenData.inc
+++ b/llvm/include/llvm/CGData/CodeGenData.inc
@@ -49,4 +49,4 @@ CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
 #endif
 
 /* Indexed codegen data format version (start from 1). */
-#define CG_DATA_INDEX_VERSION 2
+#define CG_DATA_INDEX_VERSION 3
diff --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h
index f915ce0afcd7f..dcd31d1b00430 100644
--- a/llvm/include/llvm/CGData/CodeGenDataReader.h
+++ b/llvm/include/llvm/CGData/CodeGenDataReader.h
@@ -27,6 +27,12 @@ class CodeGenDataReader {
   std::string LastErrorMsg;
 
 public:
+  struct Options {
+    /// Whether to read the Names into the stable function map.
+    /// Names are usually used for validation and debugging purpose.
+    bool ReadStableFunctionMapNames = true;
+  };
+
   CodeGenDataReader() = default;
   virtual ~CodeGenDataReader() = default;
 
@@ -51,12 +57,20 @@ class CodeGenDataReader {
   /// Factory method to create an appropriately typed reader for the given
   /// codegen data file path and file system.
   LLVM_ABI static Expected<std::unique_ptr<CodeGenDataReader>>
-  create(const Twine &Path, vfs::FileSystem &FS);
+  create(const Twine &Path, vfs::FileSystem &FS) {
+    return create(Path, FS, {});
+  }
+  LLVM_ABI static Expected<std::unique_ptr<CodeGenDataReader>>
+  create(const Twine &Path, vfs::FileSystem &FS, Options Opts);
 
   /// Factory method to create an appropriately typed reader for the given
   /// memory buffer.
   LLVM_ABI static Expected<std::unique_ptr<CodeGenDataReader>>
-  create(std::unique_ptr<MemoryBuffer> Buffer);
+  create(std::unique_ptr<MemoryBuffer> Buffer) {
+    return create(std::move(Buffer), {});
+  }
+  LLVM_ABI static Expected<std::unique_ptr<CodeGenDataReader>>
+  create(std::unique_ptr<MemoryBuffer> Buffer, Options Opts);
 
   /// Extract the cgdata embedded in sections from the given object file and
   /// merge them into the GlobalOutlineRecord. This is a static helper that
@@ -78,6 +92,8 @@ class CodeGenDataReader {
   // releaseStableFunctionMap(), it's no longer valid.
   StableFunctionMapRecord FunctionMapRecord;
 
+  Options Opts;
+
   /// Set the current error and return same.
   Error error(cgdata_error Err, const std::string &ErrMsg = "") {
     LastError = Err;
@@ -106,8 +122,11 @@ class LLVM_ABI IndexedCodeGenDataReader : public CodeGenDataReader {
   IndexedCGData::Header Header;
 
 public:
-  IndexedCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer)
-      : DataBuffer(std::move(DataBuffer)) {}
+  IndexedCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer,
+                           Options Opts)
+      : DataBuffer(std::move(DataBuffer)) {
+    this->Opts = Opts;
+  }
   IndexedCodeGenDataReader(const IndexedCodeGenDataReader &) = delete;
   IndexedCodeGenDataReader &
   operator=(const IndexedCodeGenDataReader &) = delete;
diff --git a/llvm/include/llvm/CGData/CodeGenDataWriter.h b/llvm/include/llvm/CGData/CodeGenDataWriter.h
index faef6beb30aa6..b9c807c2c1cfe 100644
--- a/llvm/include/llvm/CGData/CodeGenDataWriter.h
+++ b/llvm/include/llvm/CGData/CodeGenDataWriter.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CGDATA_CODEGENDATAWRITER_H
 #define LLVM_CGDATA_CODEGENDATAWRITER_H
 
+#include "llvm/CGData/CGDataPatchItem.h"
 #include "llvm/CGData/CodeGenData.h"
 #include "llvm/CGData/OutlinedHashTreeRecord.h"
 #include "llvm/CGData/StableFunctionMapRecord.h"
@@ -22,21 +23,23 @@
 
 namespace llvm {
 
-/// A struct to define how the data stream should be patched.
-struct CGDataPatchItem {
-  uint64_t Pos; // Where to patch.
-  uint64_t *D;  // Pointer to an array of source data.
-  int N;        // Number of elements in \c D array.
-};
-
 /// A wrapper class to abstract writer stream with support of bytes
 /// back patching.
 class CGDataOStream {
+  enum class OStreamKind {
+    fd,
+    string,
+    svector,
+  };
+
 public:
   CGDataOStream(raw_fd_ostream &FD)
-      : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
+      : Kind(OStreamKind::fd), OS(FD), LE(FD, llvm::endianness::little) {}
   CGDataOStream(raw_string_ostream &STR)
-      : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
+      : Kind(OStreamKind::string), OS(STR), LE(STR, llvm::endianness::little) {}
+  CGDataOStream(raw_svector_ostream &SVEC)
+      : Kind(OStreamKind::svector), OS(SVEC),
+        LE(SVEC, llvm::endianness::little) {}
 
   uint64_t tell() { return OS.tell(); }
   void write(uint64_t V) { LE.write<uint64_t>(V); }
@@ -48,9 +51,7 @@ class CGDataOStream {
   // directly and it won't be reflected in the stream's internal buffer.
   LLVM_ABI void patch(ArrayRef<CGDataPatchItem> P);
 
-  // If \c OS is an instance of \c raw_fd_ostream, this field will be
-  // true. Otherwise, \c OS will be an raw_string_ostream.
-  bool IsFDOStream;
+  OStreamKind Kind;
   raw_ostream &OS;
   support::endian::Writer LE;
 };
diff --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
index d4d3ffa06ad25..a75cb12a70ba6 100644
--- a/llvm/include/llvm/CGData/StableFunctionMapRecord.h
+++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
 #define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
 
+#include "llvm/CGData/CGDataPatchItem.h"
 #include "llvm/CGData/StableFunctionMap.h"
 #include "llvm/ObjectYAML/YAML.h"
 #include "llvm/Support/Compiler.h"
@@ -36,13 +37,16 @@ struct StableFunctionMapRecord {
   /// A static helper function to serialize the stable function map without
   /// owning the stable function map.
   LLVM_ABI static void serialize(raw_ostream &OS,
-                                 const StableFunctionMap *FunctionMap);
+                                 const StableFunctionMap *FunctionMap,
+                                 std::vector<CGDataPatchItem> &PatchItems);
 
   /// Serialize the stable function map to a raw_ostream.
-  LLVM_ABI void serialize(raw_ostream &OS) const;
+  LLVM_ABI void serialize(raw_ostream &OS,
+                          std::vector<CGDataPatchItem> &PatchItems) const;
 
   /// Deserialize the stable function map from a raw_ostream.
-  LLVM_ABI void deserialize(const unsigned char *&Ptr);
+  LLVM_ABI void deserialize(const unsigned char *&Ptr,
+                            bool ReadStableFunctionMapNames = true);
 
   /// Serialize the stable function map to a YAML stream.
   LLVM_ABI void serializeYAML(yaml::Output &YOS) const;
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 7b9c584d64867..b9af45de13329 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -155,7 +155,14 @@ CodeGenData &CodeGenData::getInstance() {
       // Instead, just emit an warning message and fall back as if no CGData
       // were available.
       auto FS = vfs::getRealFileSystem();
-      auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS);
+      CodeGenDataReader::Options Opts;
+#ifdef NDEBUG
+      // Do not read the stable function map names for non-assertion builds
+      // to save memory and time for production use.
+      Opts.ReadStableFunctionMapNames = false;
+#endif
+      auto ReaderOrErr =
+          CodeGenDataReader::create(CodeGenDataUsePath, *FS, Opts);
       if (Error E = ReaderOrErr.takeError()) {
         warn(std::move(E), CodeGenDataUsePath);
         return;
@@ -188,7 +195,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
     return make_error<CGDataError>(cgdata_error::unsupported_version);
   H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
 
-  static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
+  static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3,
                 "Please update the offset computation below if a new field has "
                 "been added to the header.");
   H.OutlinedHashTreeOffset =
diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index 39513d422c2c9..fa969e9fba036 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -113,23 +113,25 @@ Error IndexedCodeGenDataReader::read() {
 }
 
 Expected<std::unique_ptr<CodeGenDataReader>>
-CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
+CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS,
+                          Options Opts) {
   // Set up the buffer to read.
   auto BufferOrError = setupMemoryBuffer(Path, FS);
   if (Error E = BufferOrError.takeError())
     return std::move(E);
-  return CodeGenDataReader::create(std::move(BufferOrError.get()));
+  return CodeGenDataReader::create(std::move(BufferOrError.get()), Opts);
 }
 
 Expected<std::unique_ptr<CodeGenDataReader>>
-CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer, Options Opts) {
   if (Buffer->getBufferSize() == 0)
     return make_error<CGDataError>(cgdata_error::empty_cgdata);
 
   std::unique_ptr<CodeGenDataReader> Reader;
   // Create the reader.
   if (IndexedCodeGenDataReader::hasFormat(*Buffer))
-    Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
+    Reader =
+        std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer), Opts);
   else if (TextCodeGenDataReader::hasFormat(*Buffer))
     Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
   else
diff --git a/llvm/lib/CGData/CodeGenDataWriter.cpp b/llvm/lib/CGData/CodeGenDataWriter.cpp
index 3a392036198a9..14a8558ba63b7 100644
--- a/llvm/lib/CGData/CodeGenDataWriter.cpp
+++ b/llvm/lib/CGData/CodeGenDataWriter.cpp
@@ -19,29 +19,46 @@ using namespace llvm;
 void CGDataOStream::patch(ArrayRef<CGDataPatchItem> P) {
   using namespace support;
 
-  if (IsFDOStream) {
+  switch (Kind) {
+  case OStreamKind::fd: {
     raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
     const uint64_t LastPos = FDOStream.tell();
     for (const auto &K : P) {
       FDOStream.seek(K.Pos);
-      for (int I = 0; I < K.N; I++)
+      for (size_t I = 0; I < K.D.size(); ++I)
         write(K.D[I]);
     }
     // Reset the stream to the last position after patching so that users
     // don't accidentally overwrite data. This makes it consistent with
     // the string stream below which replaces the data directly.
     FDOStream.seek(LastPos);
-  } else {
+    break;
+  }
+  case OStreamKind::string: {
     raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
     std::string &Data = SOStream.str(); // with flush
     for (const auto &K : P) {
-      for (int I = 0; I < K.N; I++) {
+      for (size_t I = 0; I < K.D.size(); ++I) {
         uint64_t Bytes =
             endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
         Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
                      reinterpret_cast<const char *>(&Bytes), sizeof(uint64_t));
       }
     }
+    break;
+  }
+  case OStreamKind::svector: {
+    raw_svector_ostream &VOStream = static_cast<raw_svector_ostream &>(OS);
+    for (const auto &K : P) {
+      for (size_t I = 0; I < K.D.size(); ++I) {
+        uint64_t Bytes =
+            endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
+        VOStream.pwrite(reinterpret_cast<const char *>(&Bytes),
+                        sizeof(uint64_t), K.Pos + I * sizeof(uint64_t));
+      }
+    }
+    break;
+  }
   }
 }
 
@@ -106,17 +123,20 @@ Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
   if (Error E = writeHeader(COS))
     return E;
 
+  std::vector<CGDataPatchItem> PatchItems;
+
   uint64_t OutlinedHashTreeFieldStart = COS.tell();
   if (hasOutlinedHashTree())
     HashTreeRecord.serialize(COS.OS);
   uint64_t StableFunctionMapFieldStart = COS.tell();
   if (hasStableFunctionMap())
-    FunctionMapRecord.serialize(COS.OS);
+    FunctionMapRecord.serialize(COS.OS, PatchItems);
 
   // Back patch the offsets.
-  CGDataPatchItem PatchItems[] = {
-      {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1},
-      {StableFunctionMapOffset, &StableFunctionMapFieldStart, 1}};
+  PatchItems.emplace_back(OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart,
+                          1);
+  PatchItems.emplace_back(StableFunctionMapOffset, &StableFunctionMapFieldStart,
+                          1);
   COS.patch(PatchItems);
 
   return Error::success();
diff --git a/llvm/lib/CGData/StableFunctionMapRecord.cpp b/llvm/lib/CGData/StableFunctionMapRecord.cpp
index 537793b9e2b45..735a1594131e8 100644
--- a/llvm/lib/CGData/StableFunctionMapRecord.cpp
+++ b/llvm/lib/CGData/StableFunctionMapRecord.cpp
@@ -77,26 +77,33 @@ static IndexOperandHashVecType getStableIndexOperandHashes(
   return IndexOperandHashes;
 }
 
-void StableFunctionMapRecord::serialize(raw_ostream &OS) const {
-  serialize(OS, FunctionMap.get());
+void StableFunctionMapRecord::serialize(
+    raw_ostream &OS, std::vector<CGDataPatchItem> &PatchItems) const {
+  serialize(OS, FunctionMap.get(), PatchItems);
 }
 
-void StableFunctionMapRecord::serialize(raw_ostream &OS,
-                                        const StableFunctionMap *FunctionMap) {
+void StableFunctionMapRecord::serialize(
+    raw_ostream &OS, const StableFunctionMap *FunctionMap,
+    std::vector<CGDataPatchItem> &PatchItems) {
   support::endian::Writer Writer(OS, endianness::little);
 
   // Write Names.
   ArrayRef<std::string> Names = FunctionMap->getNames();
-  uint32_t ByteSize = 4;
   Writer.write<uint32_t>(Names.size());
+  // Remember the position, write back the total size of Names, so we can skip
+  // reading them if needed.
+  const uint64_t NamesByteSizeOffset = Writer.OS.tell();
+  Writer.write<uint64_t>(0);
   for (auto &Name : Names) {
     Writer.OS << Name << '\0';
-    ByteSize += Name.size() + 1;
   }
-  // Align ByteSize to 4 bytes.
-  uint32_t Padding = offsetToAlignment(ByteSize, Align(4));
+  // Align current position to 4 bytes.
+  uint32_t Padding = offsetToAlignment(Writer.OS.tell(), Align(4));
   for (uint32_t I = 0; I < Padding; ++I)
     Writer.OS << '\0';
+  const auto NamesByteSize =
+      Writer.OS.tell() - NamesByteSizeOffset - sizeof(NamesByteSizeOffset);
+  PatchItems.emplace_back(NamesByteSizeOffset, &NamesByteSize, 1);
 
   // Write StableFunctionEntries whose pointers are sorted.
   auto FuncEntries = getStableFunctionEntries(*FunctionMap);
@@ -120,7 +127,8 @@ void StableFunctionMapRecord::serialize(raw_ostream &OS,
   }
 }
 
-void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
+void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
+                                          bool ReadStableFunctionMapNames) {
   // Assert that Ptr is 4-byte aligned
   assert(((uintptr_t)Ptr % 4) == 0);
   // Read Names.
@@ -129,13 +137,25 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
   // Early exit if there is no name.
   if (NumNames == 0)
     return;
-  for (unsigned I = 0; I < NumNames; ++I) {
-    StringRef Name(reinterpret_cast<const char *>(Ptr));
-    Ptr += Name.size() + 1;
-    FunctionMap->getIdOrCreateForName(Name);
+  const auto NamesByteSize =
+      endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
+  if (ReadStableFunctionMapNames) {
+    const auto NamesOffset = reinterpret_cast<uintptr_t>(Ptr);
+    (void)NamesOffset; // Silence unused variable warning.
+    for (unsigned I = 0; I < NumNames; ++I) {
+      StringRef Name(reinterpret_cast<const char *>(Ptr));
+      Ptr += Name.size() + 1;
+      FunctionMap->getIdOrCreateForName(Name);
+    }
+    // Align Ptr to 4 bytes.
+    Ptr = reinterpret_cast<const uint8_t *>(alignAddr(Ptr, Align(4)));
+    assert(reinterpret_cast<uintptr_t>(Ptr) - NamesOffset == NamesByteSize &&
+           "NamesByteSize does not match the actual size of names");
+  } else {
+    // skip reading Names by advancing the pointer.
+    Ptr = reinterpret_cast<const uint8_t *>(reinterpret_cast<uintptr_t>(Ptr) +
+                                            NamesByteSize);
   }
-  // Align Ptr to 4 bytes.
-  Ptr = reinterpret_cast<const uint8_t *>(alignAddr(Ptr, Align(4)));
 
   // Read StableFunctionEntries.
   auto NumFuncs =
diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 9f1eb4b24772f..84dc4ab0a5522 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/CGData/CodeGenData.h"
+#include "llvm/CGData/CodeGenDataWriter.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/StructuralHash.h"
 #include "llvm/InitializePasses.h"
@@ -526,7 +527,10 @@ void GlobalMergeFunc::emitFunctionMap(Module &M) {
   SmallVector<char> Buf;
   raw_svector_ostream OS(Buf);
 
-  StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get());
+  std::vector<CGDataPatchItem> PatchItems;
+  StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get(), PatchItems);
+  CGDataOStream COS(OS);
+  COS.patch(PatchItems);
 
   std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
       OS.str(), "in-memory stable function map", false);
diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
index bea78d512a6db..0d2b0e848a2c9 100644
--- a/llvm/test/tools/llvm-cgdata/empty.test
+++ b/llvm/test/tools/llvm-cgdata/empty.test
@@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
 
 # The version number appears when asked, as it's in the header
 RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
-VERSION: Version: 2
+VERSION: Version: 3
 
 # When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
 RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@@ -30,7 +30,7 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
 #   uint64_t StableFunctionMapOffset;
 # }
 RUN: printf '\xffcgdata\x81' > %t_header.cgdata
-RUN: printf '\x02\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x03\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
index 2caa3aef40395..92ff484e31caf 100644
--- a/llvm/test/tools/llvm-cgdata/error.test
+++ b/llvm/test/tools/llvm-cgdata/error.test
@@ -22,9 +22,9 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
 RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s  --check-prefix=CORRUPT
 CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
 
-# The current version 2 while the header says 3.
+# The current version 3 while the header says 4.
 RUN: printf '\xffcgdata\x81' > %t_version.cgdata
-RUN: printf '\x03\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x04\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
index f7e078ba8efa3..b060872113b1b 100644
--- a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
+++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
@@ -63,4 +63,4 @@ CHECK-NEXT:  Mergeable function Count: 0
 
 ;--- merge-both-template.ll
 @.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
- at .data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
index c1881bc870aad..2936086321028 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
@@ -65,7 +65,7 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-1-template.ll
- at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
 
 ;--- raw-2.cgtext
 :stable_function_map
@@ -80,4 +80,4 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-2-template.ll
- at .data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
index 301ee6dcf21c8..d2965456a1999 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
@@ -74,5 +74,5 @@ MAP-NEXT: ...
 ; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated.
 ; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated.
 ; In other words, the following two trees are encoded back-to-back in a binary format.
- at .data1 = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
- at .data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data1 = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
index 98a91487aa840..8277e3272d77e 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
@@ -61,7 +61,7 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-1-template.ll
- at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
 
 ;--- raw-2.cgtext
 :stable_function_map
@@ -76,4 +76,4 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-2-template.ll
- at .data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
index 2075face72ab9..9469f1cbda331 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
@@ -33,4 +33,4 @@ CHECK-NEXT:  Mergeable function Count: 0
 ...
 
 ;--- merge-single-template.ll
- at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
index f5c9afe449da3..caaf6475ee50b 100644
--- a/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
+++ b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CGData/StableFunctionMapRecord.h"
+#include "llvm/CGData/CodeGenDataWriter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -77,7 +78,10 @@ TEST(StableFunctionMapRecordTest, Serialize) {
   // Serialize and deserialize the map.
   SmallVector<char> Out;
   raw_svector_ostream OS(Out);
-  MapRecord1.serialize(OS);
+  std::vector<CGDataPatchItem> PatchItems;
+  MapRecord1.serialize(OS, PatchItems);
+  CGDataOStream COS(OS);
+  COS.patch(PatchItems);
 
   StableFunctionMapRecord MapRecord2;
   const uint8_t *Data = reinterpret_cast<const uint8_t *>(Out.data());



More information about the llvm-commits mailing list