[llvm-branch-commits] [lld] [llvm] [CGData] Global Merge Functions (PR #112671)

Mon Nov 4 00:48:08 PST 2024

https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/112671

>From 8e10ed3b27b0f0098782171bb38387e86536be5f Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sat, 7 Sep 2024 22:48:17 -0700
Subject: [PATCH 1/5] [CGData] Stable Function Map

These define the main data structures to represent stable functions and group
similar functions in a function map.
Serialization is supported in a binary or yaml form.
---
 llvm/include/llvm/CGData/StableFunctionMap.h  | 136 ++++++++++++
 .../llvm/CGData/StableFunctionMapRecord.h     |  71 ++++++
 llvm/lib/CGData/CMakeLists.txt                |   2 +
 llvm/lib/CGData/StableFunctionMap.cpp         | 170 +++++++++++++++
 llvm/lib/CGData/StableFunctionMapRecord.cpp   | 203 ++++++++++++++++++
 llvm/unittests/CGData/CMakeLists.txt          |   2 +
 .../CGData/StableFunctionMapRecordTest.cpp    | 127 +++++++++++
 .../CGData/StableFunctionMapTest.cpp          | 153 +++++++++++++
 8 files changed, 864 insertions(+)
 create mode 100644 llvm/include/llvm/CGData/StableFunctionMap.h
 create mode 100644 llvm/include/llvm/CGData/StableFunctionMapRecord.h
 create mode 100644 llvm/lib/CGData/StableFunctionMap.cpp
 create mode 100644 llvm/lib/CGData/StableFunctionMapRecord.cpp
 create mode 100644 llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
 create mode 100644 llvm/unittests/CGData/StableFunctionMapTest.cpp

diff --git a/llvm/include/llvm/CGData/StableFunctionMap.h b/llvm/include/llvm/CGData/StableFunctionMap.h
new file mode 100644
index 00000000000000..ed217e381610da
--- /dev/null
+++ b/llvm/include/llvm/CGData/StableFunctionMap.h
@@ -0,0 +1,136 @@
+//===- StableFunctionMap.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This defines the StableFunctionMap class, to track similar functions.
+// It provides a mechanism to map stable hashes of functions to their
+// corresponding metadata. It includes structures for storing function details
+// and methods for managing and querying these mappings.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_STABLEFUNCTIONMAP_H
+#define LLVM_CGDATA_STABLEFUNCTIONMAP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/StructuralHash.h"
+
+namespace llvm {
+
+using IndexPairHash = std::pair<IndexPair, stable_hash>;
+using IndexOperandHashVecType = SmallVector<IndexPairHash>;
+
+/// A stable function is a function with a stable hash while tracking the
+/// locations of ignored operands and their hashes.
+struct StableFunction {
+  /// The combined stable hash of the function.
+  stable_hash Hash;
+  /// The name of the function.
+  std::string FunctionName;
+  /// The name of the module the function is in.
+  std::string ModuleName;
+  /// The number of instructions.
+  unsigned InstCount;
+  /// A vector of pairs of IndexPair and operand hash which was skipped.
+  IndexOperandHashVecType IndexOperandHashes;
+
+  StableFunction(stable_hash Hash, const std::string FunctionName,
+                 const std::string ModuleName, unsigned InstCount,
+                 IndexOperandHashVecType &&IndexOperandHashes)
+      : Hash(Hash), FunctionName(FunctionName), ModuleName(ModuleName),
+        InstCount(InstCount),
+        IndexOperandHashes(std::move(IndexOperandHashes)) {}
+  StableFunction() = default;
+};
+
+/// An efficient form of StableFunction for fast look-up
+struct StableFunctionEntry {
+  /// The combined stable hash of the function.
+  stable_hash Hash;
+  /// Id of the function name.
+  unsigned FunctionNameId;
+  /// Id of the module name.
+  unsigned ModuleNameId;
+  /// The number of instructions.
+  unsigned InstCount;
+  /// A map from an IndexPair to a stable_hash which was skipped.
+  std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
+
+  StableFunctionEntry(
+      stable_hash Hash, unsigned FunctionNameId, unsigned ModuleNameId,
+      unsigned InstCount,
+      std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
+      : Hash(Hash), FunctionNameId(FunctionNameId), ModuleNameId(ModuleNameId),
+        InstCount(InstCount),
+        IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
+};
+
+using HashFuncsMapType =
+    DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
+
+class StableFunctionMap {
+  /// A map from a stable_hash to a vector of functions with that hash.
+  HashFuncsMapType HashToFuncs;
+  /// A vector of strings to hold names.
+  SmallVector<std::string> IdToName;
+  /// A map from StringRef (name) to an ID.
+  StringMap<unsigned> NameToId;
+  /// True if the function map is finalized with minimal content.
+  bool Finalized = false;
+
+public:
+  /// Get the HashToFuncs map for serialization.
+  const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
+
+  /// Get the NameToId vector for serialization.
+  const SmallVector<std::string> getNames() const { return IdToName; }
+
+  /// Get an existing ID associated with the given name or create a new ID if it
+  /// doesn't exist.
+  unsigned getIdOrCreateForName(StringRef Name);
+
+  /// Get the name associated with a given ID
+  std::optional<std::string> getNameForId(unsigned Id) const;
+
+  /// Insert a `StableFunction` object into the function map. This method
+  /// handles the uniquing of string names and create a `StableFunctionEntry`
+  /// for insertion.
+  void insert(const StableFunction &Func);
+
+  /// Insert a `StableFunctionEntry` into the function map directly. This
+  /// method assumes that string names have already been uniqued and the
+  /// `StableFunctionEntry` is ready for insertion.
+  void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
+    assert(!Finalized && "Cannot insert after finalization");
+    HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
+  }
+
+  /// Merge a \p OtherMap into this function map.
+  void merge(const StableFunctionMap &OtherMap);
+
+  /// \returns true if there is no stable function entry.
+  bool empty() { return size() == 0; }
+
+  enum SizeType {
+    UniqueHashCount,        // The number of unique hashes in HashToFuncs.
+    TotalFunctionCount,     // The number of total functions in HashToFuncs.
+    MergeableFunctionCount, // The number of functions that can be merged based
+                            // on their hash.
+  };
+
+  /// \returns the size of StableFunctionMap.
+  /// \p Type is the type of size to return.
+  size_t size(SizeType Type = UniqueHashCount) const;
+
+  /// Finalize the stable function map by trimming content.
+  void finalize();
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
new file mode 100644
index 00000000000000..0517f2c20d72ff
--- /dev/null
+++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
@@ -0,0 +1,71 @@
+//===- StableFunctionMapRecord.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This defines the StableFunctionMapRecord structure, which provides
+// functionality for managing and serializing a StableFunctionMap. It includes
+// methods for serialization to and from raw and YAML streams, as well as
+// utilities for merging and finalizing function maps.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
+#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
+
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct StableFunctionMapRecord {
+  std::unique_ptr<StableFunctionMap> FunctionMap;
+
+  StableFunctionMapRecord() {
+    FunctionMap = std::make_unique<StableFunctionMap>();
+  }
+
+  StableFunctionMapRecord(std::unique_ptr<StableFunctionMap> FunctionMap)
+      : FunctionMap(std::move(FunctionMap)) {}
+
+  /// A static helper function to serialize the stable function map without
+  /// owning the stable function map.
+  static void serialize(raw_ostream &OS, const StableFunctionMap *FunctionMap);
+
+  /// Serialize the stable function map to a raw_ostream.
+  void serialize(raw_ostream &OS) const;
+
+  /// Deserialize the stable function map from a raw_ostream.
+  void deserialize(const unsigned char *&Ptr);
+
+  /// Serialize the stable function map to a YAML stream.
+  void serializeYAML(yaml::Output &YOS) const;
+
+  /// Deserialize the stable function map from a YAML stream.
+  void deserializeYAML(yaml::Input &YIS);
+
+  /// Finalize the stable function map by trimming content.
+  void finalize() { FunctionMap->finalize(); }
+
+  /// Merge the stable function map into this one.
+  void merge(const StableFunctionMapRecord &Other) {
+    FunctionMap->merge(*Other.FunctionMap);
+  }
+
+  /// \returns true if the stable function map is empty.
+  bool empty() const { return FunctionMap->empty(); }
+
+  /// Print the stable function map in a YAML format.
+  void print(raw_ostream &OS = llvm::errs()) const {
+    yaml::Output YOS(OS);
+    serializeYAML(YOS);
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/CGData/CMakeLists.txt b/llvm/lib/CGData/CMakeLists.txt
index 157b0dfb7f9fcf..003173139f36c5 100644
--- a/llvm/lib/CGData/CMakeLists.txt
+++ b/llvm/lib/CGData/CMakeLists.txt
@@ -4,6 +4,8 @@ add_llvm_component_library(LLVMCGData
   CodeGenDataWriter.cpp
   OutlinedHashTree.cpp
   OutlinedHashTreeRecord.cpp
+  StableFunctionMap.cpp
+  StableFunctionMapRecord.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData
diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp
new file mode 100644
index 00000000000000..638f92df68e028
--- /dev/null
+++ b/llvm/lib/CGData/StableFunctionMap.cpp
@@ -0,0 +1,170 @@
+//===-- StableFunctionMap.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the functionality for the StableFunctionMap class, which
+// manages the mapping of stable function hashes to their metadata. It includes
+// methods for inserting, merging, and finalizing function entries, as well as
+// utilities for handling function names and IDs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMap.h"
+
+#define DEBUG_TYPE "stable-function-map"
+
+using namespace llvm;
+
+unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
+  auto It = NameToId.find(Name);
+  if (It == NameToId.end()) {
+    unsigned Id = IdToName.size();
+    assert(Id == NameToId.size() && "ID collision");
+    IdToName.emplace_back(Name.str());
+    NameToId[IdToName.back()] = Id;
+    return Id;
+  } else {
+    return It->second;
+  }
+}
+
+std::optional<std::string> StableFunctionMap::getNameForId(unsigned Id) const {
+  if (Id >= IdToName.size())
+    return std::nullopt;
+  return IdToName[Id];
+}
+
+void StableFunctionMap::insert(const StableFunction &Func) {
+  assert(!Finalized && "Cannot insert after finalization");
+  auto FuncNameId = getIdOrCreateForName(Func.FunctionName);
+  auto ModuleNameId = getIdOrCreateForName(Func.ModuleName);
+  auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+  for (auto &[Index, Hash] : Func.IndexOperandHashes)
+    (*IndexOperandHashMap)[Index] = Hash;
+  auto FuncEntry = std::make_unique<StableFunctionEntry>(
+      Func.Hash, FuncNameId, ModuleNameId, Func.InstCount,
+      std::move(IndexOperandHashMap));
+  insert(std::move(FuncEntry));
+}
+
+void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
+  assert(!Finalized && "Cannot merge after finalization");
+  for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
+    auto &ThisFuncs = HashToFuncs[Hash];
+    for (auto &Func : Funcs) {
+      auto FuncNameId =
+          getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
+      auto ModuleNameId =
+          getIdOrCreateForName(*OtherMap.getNameForId(Func->ModuleNameId));
+      auto ClonedIndexOperandHashMap =
+          std::make_unique<IndexOperandHashMapType>(*Func->IndexOperandHashMap);
+      ThisFuncs.emplace_back(std::make_unique<StableFunctionEntry>(
+          Func->Hash, FuncNameId, ModuleNameId, Func->InstCount,
+          std::move(ClonedIndexOperandHashMap)));
+    }
+  }
+}
+
+size_t StableFunctionMap::size(SizeType Type) const {
+  switch (Type) {
+  case UniqueHashCount:
+    return HashToFuncs.size();
+  case TotalFunctionCount: {
+    size_t Count = 0;
+    for (auto &Funcs : HashToFuncs)
+      Count += Funcs.second.size();
+    return Count;
+  }
+  case MergeableFunctionCount: {
+    size_t Count = 0;
+    for (auto &[Hash, Funcs] : HashToFuncs)
+      if (Funcs.size() >= 2)
+        Count += Funcs.size();
+    return Count;
+  }
+  }
+  return 0;
+}
+
+using ParamLocs = SmallVector<IndexPair>;
+static void removeIdenticalIndexPair(
+    SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) {
+  auto &RSF = SFS[0];
+  unsigned StableFunctionCount = SFS.size();
+
+  SmallVector<IndexPair> ToDelete;
+  for (auto &[Pair, Hash] : *(RSF->IndexOperandHashMap)) {
+    bool Identical = true;
+    for (unsigned J = 1; J < StableFunctionCount; ++J) {
+      auto &SF = SFS[J];
+      assert(SF->IndexOperandHashMap->count(Pair));
+      auto SHash = (*SF->IndexOperandHashMap)[Pair];
+      if (Hash != SHash) {
+        Identical = false;
+        break;
+      }
+    }
+
+    // No need to parameterize them if the hashes are identical across stable
+    // functions.
+    if (Identical)
+      ToDelete.emplace_back(Pair);
+  }
+
+  for (auto &Pair : ToDelete)
+    for (auto &SF : SFS)
+      SF->IndexOperandHashMap->erase(Pair);
+}
+
+void StableFunctionMap::finalize() {
+  Finalized = true;
+
+  for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
+    auto &[StableHash, SFS] = *It;
+
+    // Group stable functions by ModuleIdentifier.
+    std::stable_sort(SFS.begin(), SFS.end(),
+                     [&](const std::unique_ptr<StableFunctionEntry> &L,
+                         const std::unique_ptr<StableFunctionEntry> &R) {
+                       return *getNameForId(L->ModuleNameId) <
+                              *getNameForId(R->ModuleNameId);
+                     });
+
+    // Consider the first function as the root function.
+    auto &RSF = SFS[0];
+
+    bool IsValid = true;
+    unsigned StableFunctionCount = SFS.size();
+    for (unsigned I = 1; I < StableFunctionCount; ++I) {
+      auto &SF = SFS[I];
+      assert(RSF->Hash == SF->Hash);
+      if (RSF->InstCount != SF->InstCount) {
+        IsValid = false;
+        break;
+      }
+      if (RSF->IndexOperandHashMap->size() != SF->IndexOperandHashMap->size()) {
+        IsValid = false;
+        break;
+      }
+      for (auto &P : *RSF->IndexOperandHashMap) {
+        auto &InstOpndIndex = P.first;
+        if (!SF->IndexOperandHashMap->count(InstOpndIndex)) {
+          IsValid = false;
+          break;
+        }
+      }
+    }
+    if (!IsValid) {
+      HashToFuncs.erase(It);
+      continue;
+    }
+
+    // Trim the index pair that has the same operand hash across
+    // stable functions.
+    removeIdenticalIndexPair(SFS);
+  }
+}
diff --git a/llvm/lib/CGData/StableFunctionMapRecord.cpp b/llvm/lib/CGData/StableFunctionMapRecord.cpp
new file mode 100644
index 00000000000000..05c96f873947ee
--- /dev/null
+++ b/llvm/lib/CGData/StableFunctionMapRecord.cpp
@@ -0,0 +1,203 @@
+//===-- StableFunctionMapRecord.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the functionality for the StableFunctionMapRecord class,
+// including methods for serialization and deserialization of stable function
+// maps to and from raw and YAML streams. It also includes utilities for
+// managing function entries and their metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMapRecord.h"
+#include "llvm/Support/EndianStream.h"
+
+#define DEBUG_TYPE "stable-function-map-record"
+
+using namespace llvm;
+using namespace llvm::support;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(IndexPairHash)
+LLVM_YAML_IS_SEQUENCE_VECTOR(StableFunction)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<IndexPairHash> {
+  static void mapping(IO &IO, IndexPairHash &Key) {
+    IO.mapRequired("InstIndex", Key.first.first);
+    IO.mapRequired("OpndIndex", Key.first.second);
+    IO.mapRequired("OpndHash", Key.second);
+  }
+};
+
+template <> struct MappingTraits<StableFunction> {
+  static void mapping(IO &IO, StableFunction &Func) {
+    IO.mapRequired("Hash", Func.Hash);
+    IO.mapRequired("FunctionName", Func.FunctionName);
+    IO.mapRequired("ModuleName", Func.ModuleName);
+    IO.mapRequired("InstCount", Func.InstCount);
+    IO.mapRequired("IndexOperandHashes", Func.IndexOperandHashes);
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+// Get a sorted vector of StableFunctionEntry pointers.
+static SmallVector<const StableFunctionEntry *>
+getStableFunctionEntries(const StableFunctionMap &SFM) {
+  SmallVector<const StableFunctionEntry *> FuncEntries;
+  for (const auto &P : SFM.getFunctionMap())
+    for (auto &Func : P.second)
+      FuncEntries.emplace_back(Func.get());
+
+  std::stable_sort(
+      FuncEntries.begin(), FuncEntries.end(), [&](auto &A, auto &B) {
+        return std::tuple(A->Hash, SFM.getNameForId(A->ModuleNameId),
+                          SFM.getNameForId(A->FunctionNameId)) <
+               std::tuple(B->Hash, SFM.getNameForId(B->ModuleNameId),
+                          SFM.getNameForId(B->FunctionNameId));
+      });
+  return FuncEntries;
+}
+
+// Get a sorted vector of IndexOperandHashes.
+static IndexOperandHashVecType
+getStableIndexOperandHashes(const StableFunctionEntry *FuncEntry) {
+  IndexOperandHashVecType IndexOperandHashes;
+  for (auto &[Indices, OpndHash] : *FuncEntry->IndexOperandHashMap)
+    IndexOperandHashes.emplace_back(Indices, OpndHash);
+  std::sort(IndexOperandHashes.begin(), IndexOperandHashes.end(),
+            [](auto &A, auto &B) { return A.first < B.first; });
+  return IndexOperandHashes;
+}
+
+void StableFunctionMapRecord::serialize(raw_ostream &OS) const {
+  serialize(OS, FunctionMap.get());
+}
+
+void StableFunctionMapRecord::serialize(raw_ostream &OS,
+                                        const StableFunctionMap *FunctionMap) {
+  support::endian::Writer Writer(OS, endianness::little);
+
+  // Write Names.
+  auto &Names = FunctionMap->getNames();
+  uint32_t ByteSize = 4;
+  Writer.write<uint32_t>(Names.size());
+  for (auto &Name : Names) {
+    Writer.OS << Name << '\0';
+    ByteSize += Name.size() + 1;
+  }
+  // Align ByteSize to 4 bytes.
+  uint32_t Padding = offsetToAlignment(ByteSize, Align(4));
+  for (uint32_t I = 0; I < Padding; ++I)
+    Writer.OS << '\0';
+
+  // Write StableFunctionEntries whose pointers are sorted.
+  auto FuncEntries = getStableFunctionEntries(*FunctionMap);
+  Writer.write<uint32_t>(FuncEntries.size());
+
+  for (const auto *FuncRef : FuncEntries) {
+    Writer.write<stable_hash>(FuncRef->Hash);
+    Writer.write<uint32_t>(FuncRef->FunctionNameId);
+    Writer.write<uint32_t>(FuncRef->ModuleNameId);
+    Writer.write<uint32_t>(FuncRef->InstCount);
+
+    // Emit IndexOperandHashes sorted from IndexOperandHashMap.
+    IndexOperandHashVecType IndexOperandHashes =
+        getStableIndexOperandHashes(FuncRef);
+    Writer.write<uint32_t>(IndexOperandHashes.size());
+    for (auto &IndexOperandHash : IndexOperandHashes) {
+      Writer.write<uint32_t>(IndexOperandHash.first.first);
+      Writer.write<uint32_t>(IndexOperandHash.first.second);
+      Writer.write<stable_hash>(IndexOperandHash.second);
+    }
+  }
+}
+
+void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
+  // Assert that Ptr is 4-byte aligned
+  assert(((uintptr_t)Ptr % 4) == 0);
+  // Read Names.
+  auto NumNames =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  // Early exit if there is no name.
+  if (NumNames == 0)
+    return;
+  for (unsigned I = 0; I < NumNames; ++I) {
+    std::string Name(reinterpret_cast<const char *>(Ptr));
+    Ptr += Name.size() + 1;
+    FunctionMap->getIdOrCreateForName(Name);
+  }
+  // Align Ptr to 4 bytes.
+  Ptr = reinterpret_cast<const uint8_t *>(alignAddr(Ptr, Align(4)));
+
+  // Read StableFunctionEntries.
+  auto NumFuncs =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  for (unsigned I = 0; I < NumFuncs; ++I) {
+    auto Hash =
+        endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
+    auto FunctionNameId =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+    assert(FunctionMap->getNameForId(FunctionNameId) &&
+           "FunctionNameId out of range");
+    auto ModuleNameId =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+    assert(FunctionMap->getNameForId(ModuleNameId) &&
+           "ModuleNameId out of range");
+    auto InstCount =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+
+    // Read IndexOperandHashes to build IndexOperandHashMap
+    auto NumIndexOperandHashes =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+    auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+    for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
+      auto InstIndex =
+          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+      auto OpndIndex =
+          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+      auto OpndHash =
+          endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
+      assert(InstIndex < InstCount && "InstIndex out of range");
+
+      auto Indices = std::make_pair(InstIndex, OpndIndex);
+      (*IndexOperandHashMap)[Indices] = OpndHash;
+    }
+
+    // Insert a new StableFunctionEntry into the map.
+    auto FuncEntry = std::make_unique<StableFunctionEntry>(
+        Hash, FunctionNameId, ModuleNameId, InstCount,
+        std::move(IndexOperandHashMap));
+
+    FunctionMap->insert(std::move(FuncEntry));
+  }
+}
+
+void StableFunctionMapRecord::serializeYAML(yaml::Output &YOS) const {
+  auto FuncEntries = getStableFunctionEntries(*FunctionMap);
+  SmallVector<StableFunction> Functions;
+  for (const auto *FuncEntry : FuncEntries) {
+    auto IndexOperandHashes = getStableIndexOperandHashes(FuncEntry);
+    Functions.emplace_back(
+        FuncEntry->Hash, *FunctionMap->getNameForId(FuncEntry->FunctionNameId),
+        *FunctionMap->getNameForId(FuncEntry->ModuleNameId),
+        FuncEntry->InstCount, std::move(IndexOperandHashes));
+  }
+
+  YOS << Functions;
+}
+
+void StableFunctionMapRecord::deserializeYAML(yaml::Input &YIS) {
+  std::vector<StableFunction> Funcs;
+  YIS >> Funcs;
+  for (auto &Func : Funcs)
+    FunctionMap->insert(Func);
+  YIS.nextDocument();
+}
diff --git a/llvm/unittests/CGData/CMakeLists.txt b/llvm/unittests/CGData/CMakeLists.txt
index 792b323130b474..0bdb9e1f08c702 100644
--- a/llvm/unittests/CGData/CMakeLists.txt
+++ b/llvm/unittests/CGData/CMakeLists.txt
@@ -9,6 +9,8 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(CGDataTests
   OutlinedHashTreeRecordTest.cpp
   OutlinedHashTreeTest.cpp
+  StableFunctionMapRecordTest.cpp
+  StableFunctionMapTest.cpp
   )
 
 target_link_libraries(CGDataTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
new file mode 100644
index 00000000000000..f5c9afe449da35
--- /dev/null
+++ b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
@@ -0,0 +1,127 @@
+//===- StableFunctionMapRecordTest.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMapRecord.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(StableFunctionMapRecordTest, Print) {
+  StableFunctionMapRecord MapRecord;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  MapRecord.FunctionMap->insert(Func1);
+
+  const char *ExpectedMapStr = R"(---
+- Hash:            1
+  FunctionName:    Func1
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        3
+...
+)";
+  std::string MapDump;
+  raw_string_ostream OS(MapDump);
+  MapRecord.print(OS);
+  EXPECT_EQ(ExpectedMapStr, MapDump);
+}
+
+TEST(StableFunctionMapRecordTest, Stable) {
+  StableFunction Func1{1, "Func2", "Mod1", 1, {}};
+  StableFunction Func2{1, "Func3", "Mod1", 1, {}};
+  StableFunction Func3{1, "Func1", "Mod2", 1, {}};
+  StableFunction Func4{2, "Func4", "Mod3", 1, {}};
+
+  StableFunctionMapRecord MapRecord1;
+  MapRecord1.FunctionMap->insert(Func1);
+  MapRecord1.FunctionMap->insert(Func2);
+  MapRecord1.FunctionMap->insert(Func3);
+  MapRecord1.FunctionMap->insert(Func4);
+
+  StableFunctionMapRecord MapRecord2;
+  MapRecord2.FunctionMap->insert(Func4);
+  MapRecord2.FunctionMap->insert(Func3);
+  MapRecord2.FunctionMap->insert(Func2);
+  MapRecord2.FunctionMap->insert(Func1);
+
+  // Output is sorted by hash (1 < 2), module name (Mod1 < Mod2), and function
+  // name (Func2 < Func3).
+  std::string MapDump1;
+  raw_string_ostream OS1(MapDump1);
+  MapRecord1.print(OS1);
+  std::string MapDump2;
+  raw_string_ostream OS2(MapDump2);
+  MapRecord2.print(OS2);
+  EXPECT_EQ(MapDump1, MapDump2);
+}
+
+TEST(StableFunctionMapRecordTest, Serialize) {
+  StableFunctionMapRecord MapRecord1;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 2}, 4}}};
+  StableFunction Func2{2, "Func2", "Mod1", 3, {{{0, 1}, 2}}};
+  StableFunction Func3{2, "Func3", "Mod1", 3, {{{0, 1}, 3}}};
+  MapRecord1.FunctionMap->insert(Func1);
+  MapRecord1.FunctionMap->insert(Func2);
+  MapRecord1.FunctionMap->insert(Func3);
+
+  // Serialize and deserialize the map.
+  SmallVector<char> Out;
+  raw_svector_ostream OS(Out);
+  MapRecord1.serialize(OS);
+
+  StableFunctionMapRecord MapRecord2;
+  const uint8_t *Data = reinterpret_cast<const uint8_t *>(Out.data());
+  MapRecord2.deserialize(Data);
+
+  // Two maps should be identical.
+  std::string MapDump1;
+  raw_string_ostream OS1(MapDump1);
+  MapRecord1.print(OS1);
+  std::string MapDump2;
+  raw_string_ostream OS2(MapDump2);
+  MapRecord2.print(OS2);
+
+  EXPECT_EQ(MapDump1, MapDump2);
+}
+
+TEST(StableFunctionMapRecordTest, SerializeYAML) {
+  StableFunctionMapRecord MapRecord1;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 2}, 4}}};
+  StableFunction Func2{2, "Func2", "Mod1", 3, {{{0, 1}, 2}}};
+  StableFunction Func3{2, "Func3", "Mod1", 3, {{{0, 1}, 3}}};
+  MapRecord1.FunctionMap->insert(Func1);
+  MapRecord1.FunctionMap->insert(Func2);
+  MapRecord1.FunctionMap->insert(Func3);
+
+  // Serialize and deserialize the map in a YAML format.
+  std::string Out;
+  raw_string_ostream OS(Out);
+  yaml::Output YOS(OS);
+  MapRecord1.serializeYAML(YOS);
+
+  StableFunctionMapRecord MapRecord2;
+  yaml::Input YIS(StringRef(Out.data(), Out.size()));
+  MapRecord2.deserializeYAML(YIS);
+
+  // Two maps should be identical.
+  std::string MapDump1;
+  raw_string_ostream OS1(MapDump1);
+  MapRecord1.print(OS1);
+  std::string MapDump2;
+  raw_string_ostream OS2(MapDump2);
+  MapRecord2.print(OS2);
+
+  EXPECT_EQ(MapDump1, MapDump2);
+}
+
+} // end namespace
diff --git a/llvm/unittests/CGData/StableFunctionMapTest.cpp b/llvm/unittests/CGData/StableFunctionMapTest.cpp
new file mode 100644
index 00000000000000..839b997cdacac2
--- /dev/null
+++ b/llvm/unittests/CGData/StableFunctionMapTest.cpp
@@ -0,0 +1,153 @@
+//===- StableFunctionMapTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMap.h"
+#include "gmock/gmock-matchers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+using testing::Contains;
+using testing::Key;
+using testing::Not;
+using testing::Pair;
+using testing::SizeIs;
+
+TEST(StableFunctionMap, Name) {
+  StableFunctionMap Map;
+  EXPECT_TRUE(Map.empty());
+  EXPECT_TRUE(Map.getNames().empty());
+  unsigned ID1 = Map.getIdOrCreateForName("Func1");
+  unsigned ID2 = Map.getIdOrCreateForName("Func2");
+  unsigned ID3 = Map.getIdOrCreateForName("Func1");
+
+  EXPECT_THAT(Map.getNames(), SizeIs(2));
+  // The different names should return different IDs.
+  EXPECT_NE(ID1, ID2);
+  // The same name should return the same ID.
+  EXPECT_EQ(ID1, ID3);
+  // The IDs should be valid.
+  EXPECT_EQ(*Map.getNameForId(ID1), "Func1");
+  EXPECT_EQ(*Map.getNameForId(ID2), "Func2");
+}
+
+TEST(StableFunctionMap, Insert) {
+  StableFunctionMap Map;
+
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod1", 2, {{{0, 1}, 2}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+  // We only have a unique hash, 1
+  EXPECT_THAT(Map, SizeIs(1));
+  // We have two functions with the same hash which are potentially mergeable.
+  EXPECT_EQ(Map.size(StableFunctionMap::SizeType::TotalFunctionCount), 2u);
+  EXPECT_EQ(Map.size(StableFunctionMap::SizeType::MergeableFunctionCount), 2u);
+}
+
+TEST(StableFunctionMap, InsertEntry) {
+  StableFunctionMap Map;
+
+  unsigned ID1 = Map.getIdOrCreateForName("Func1");
+  unsigned ID2 = Map.getIdOrCreateForName("Mod1");
+  unsigned ID3 = Map.getIdOrCreateForName("Func2");
+
+  // Create a function entry and insert it into the map.
+  auto IndexOperandHashMap1 = std::make_unique<IndexOperandHashMapType>();
+  IndexOperandHashMap1->try_emplace({1, 1}, 3);
+  auto FuncEntry1 = std::make_unique<StableFunctionEntry>(
+      1, ID1, ID2, 2, std::move(IndexOperandHashMap1));
+  Map.insert(std::move(FuncEntry1));
+
+  // Create another function entry and insert it into the map.
+  auto IndexOperandHashMap2 = std::make_unique<IndexOperandHashMapType>();
+  IndexOperandHashMap2->try_emplace({0, 1}, 2);
+  auto FuncEntry2 = std::make_unique<StableFunctionEntry>(
+      1, ID3, ID2, 2, std::move(IndexOperandHashMap2));
+  Map.insert(std::move(FuncEntry2));
+
+  // We only have a unique hash, 1
+  EXPECT_THAT(Map, SizeIs(1));
+  // We have two functions with the same hash which are potentially mergeable.
+  EXPECT_EQ(Map.size(StableFunctionMap::SizeType::TotalFunctionCount), 2u);
+}
+
+TEST(StableFunctionMap, Merge) {
+  StableFunctionMap Map1;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod1", 2, {{{0, 1}, 2}}};
+  StableFunction Func3{2, "Func3", "Mod1", 2, {{{1, 1}, 2}}};
+  Map1.insert(Func1);
+  Map1.insert(Func2);
+  Map1.insert(Func3);
+
+  StableFunctionMap Map2;
+  StableFunction Func4{1, "Func4", "Mod2", 2, {{{0, 1}, 4}}};
+  StableFunction Func5{2, "Func5", "Mod2", 2, {{{1, 1}, 5}}};
+  StableFunction Func6{3, "Func6", "Mod2", 2, {{{1, 1}, 6}}};
+  Map2.insert(Func4);
+  Map2.insert(Func5);
+  Map2.insert(Func6);
+
+  // Merge two maps.
+  Map1.merge(Map2);
+
+  // We only have two unique hashes, 1, 2 and 3
+  EXPECT_THAT(Map1, SizeIs(3));
+  // We have total 6 functions.
+  EXPECT_EQ(Map1.size(StableFunctionMap::SizeType::TotalFunctionCount), 6u);
+  // We have 5 mergeable functions. Func6 only has a unique hash, 3.
+  EXPECT_EQ(Map1.size(StableFunctionMap::SizeType::MergeableFunctionCount), 5u);
+}
+
+TEST(StableFunctionMap, Finalize1) {
+  StableFunctionMap Map;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod2", 3, {{{0, 1}, 2}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+
+  // Instruction count is mis-matched, so they're not mergeable.
+  Map.finalize();
+  EXPECT_TRUE(Map.empty());
+}
+
+TEST(StableFunctionMap, Finalize2) {
+  StableFunctionMap Map;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+
+  // Operand map size is mis-matched, so they're not mergeable.
+  Map.finalize();
+  EXPECT_TRUE(Map.empty());
+}
+
+TEST(StableFunctionMap, Finalize3) {
+  StableFunctionMap Map;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 1}, 1}}};
+  StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+
+  // The same operand entry is removed, which is redundant.
+  Map.finalize();
+  auto &M = Map.getFunctionMap();
+  EXPECT_THAT(M, SizeIs(1));
+  auto &FuncEntries = M.begin()->second;
+  for (auto &FuncEntry : FuncEntries) {
+    EXPECT_THAT(*FuncEntry->IndexOperandHashMap, SizeIs(1));
+    ASSERT_THAT(*FuncEntry->IndexOperandHashMap,
+                Not(Contains(Key(Pair(1, 1)))));
+  }
+}
+
+} // end namespace

>From c7913f9fff736da4cc6a78a17e41dc539bc75e8a Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 9 Sep 2024 19:38:05 -0700
Subject: [PATCH 2/5] [CGData][llvm-cgdata] Support for stable function map

This introduces a new cgdata format for stable function maps.
The raw data is embedded in the __llvm_merge section during compile time.
This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal.
---
 lld/test/MachO/cgdata-generate.s              |  6 +-
 llvm/docs/CommandGuide/llvm-cgdata.rst        | 16 ++--
 llvm/include/llvm/CGData/CodeGenData.h        | 24 +++++-
 llvm/include/llvm/CGData/CodeGenData.inc      | 12 ++-
 llvm/include/llvm/CGData/CodeGenDataReader.h  | 29 ++++++-
 llvm/include/llvm/CGData/CodeGenDataWriter.h  | 17 +++-
 llvm/lib/CGData/CodeGenData.cpp               | 30 ++++---
 llvm/lib/CGData/CodeGenDataReader.cpp         | 63 +++++++++-----
 llvm/lib/CGData/CodeGenDataWriter.cpp         | 30 ++++++-
 llvm/test/tools/llvm-cgdata/empty.test        |  8 +-
 llvm/test/tools/llvm-cgdata/error.test        | 13 +--
 .../merge-combined-funcmap-hashtree.test      | 66 +++++++++++++++
 .../llvm-cgdata/merge-funcmap-archive.test    | 83 +++++++++++++++++++
 .../llvm-cgdata/merge-funcmap-concat.test     | 78 +++++++++++++++++
 .../llvm-cgdata/merge-funcmap-double.test     | 79 ++++++++++++++++++
 .../llvm-cgdata/merge-funcmap-single.test     | 36 ++++++++
 ...chive.test => merge-hashtree-archive.test} |  8 +-
 ...concat.test => merge-hashtree-concat.test} |  6 +-
 ...double.test => merge-hashtree-double.test} |  8 +-
 ...single.test => merge-hashtree-single.test} |  4 +-
 llvm/tools/llvm-cgdata/llvm-cgdata.cpp        | 48 ++++++++---
 21 files changed, 577 insertions(+), 87 deletions(-)
 create mode 100644 llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
 create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
 create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
 create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
 create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
 rename llvm/test/tools/llvm-cgdata/{merge-archive.test => merge-hashtree-archive.test} (91%)
 rename llvm/test/tools/llvm-cgdata/{merge-concat.test => merge-hashtree-concat.test} (93%)
 rename llvm/test/tools/llvm-cgdata/{merge-double.test => merge-hashtree-double.test} (90%)
 rename llvm/test/tools/llvm-cgdata/{merge-single.test => merge-hashtree-single.test} (92%)

diff --git a/lld/test/MachO/cgdata-generate.s b/lld/test/MachO/cgdata-generate.s
index 174df39d666c5d..f942ae07f64e0e 100644
--- a/lld/test/MachO/cgdata-generate.s
+++ b/lld/test/MachO/cgdata-generate.s
@@ -3,12 +3,12 @@
 
 # RUN: rm -rf %t; split-file %s %t
 
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
 # RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
+# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
 # RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
 # RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
+# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
 # RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s
 
 # RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o
diff --git a/llvm/docs/CommandGuide/llvm-cgdata.rst b/llvm/docs/CommandGuide/llvm-cgdata.rst
index f592e1508844ee..0670decd087e39 100644
--- a/llvm/docs/CommandGuide/llvm-cgdata.rst
+++ b/llvm/docs/CommandGuide/llvm-cgdata.rst
@@ -11,15 +11,13 @@ SYNOPSIS
 DESCRIPTION
 -----------
 
-The :program:llvm-cgdata utility parses raw codegen data embedded
-in compiled binary files and merges them into a single .cgdata file.
-It can also inspect and manipulate .cgdata files.
-Currently, the tool supports saving and restoring outlined hash trees,
-enabling global function outlining across modules, allowing for more
-efficient function outlining in subsequent compilations.
-The design is extensible, allowing for the incorporation of additional
-codegen summaries and optimization techniques, such as global function
-merging, in the future.
+The :program:llvm-cgdata utility parses raw codegen data embedded in compiled
+binary files and merges them into a single .cgdata file. It can also inspect
+and manipulate .cgdata files. Currently, the tool supports saving and restoring
+outlined hash trees and stable function maps, allowing for more efficient
+function outlining and function merging across modules in subsequent
+compilations. The design is extensible, allowing for the incorporation of
+additional codegen summaries and optimization techniques.
 
 COMMANDS
 --------
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 53550beeae1f83..5d7c74725ccef1 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -19,6 +19,7 @@
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/CGData/OutlinedHashTree.h"
 #include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Caching.h"
@@ -41,7 +42,9 @@ enum class CGDataKind {
   Unknown = 0x0,
   // A function outlining info.
   FunctionOutlinedHashTree = 0x1,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
+  // A function merging info.
+  StableFunctionMergingMap = 0x2,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
 };
 
 const std::error_category &cgdata_category();
@@ -108,6 +111,8 @@ enum CGDataMode {
 class CodeGenData {
   /// Global outlined hash tree that has oulined hash sequences across modules.
   std::unique_ptr<OutlinedHashTree> PublishedHashTree;
+  /// Global stable function map that has stable function info across modules.
+  std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
 
   /// This flag is set when -fcodegen-data-generate is passed.
   /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
@@ -131,6 +136,9 @@ class CodeGenData {
   bool hasOutlinedHashTree() {
     return PublishedHashTree && !PublishedHashTree->empty();
   }
+  bool hasStableFunctionMap() {
+    return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
+  }
 
   /// Returns the outlined hash tree. This can be globally used in a read-only
   /// manner.
@@ -147,6 +155,12 @@ class CodeGenData {
     // Ensure we disable emitCGData as we do not want to read and write both.
     EmitCGData = false;
   }
+  void
+  publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
+    PublishedStableFunctionMap = std::move(FunctionMap);
+    // Ensure we disable emitCGData as we do not want to read and write both.
+    EmitCGData = false;
+  }
 };
 
 namespace cgdata {
@@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
   CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
 }
 
+inline void
+publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
+  CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
+}
+
 struct StreamCacheData {
   /// Backing buffer for serialized data stream.
   SmallVector<SmallString<0>> Outputs;
@@ -249,6 +268,8 @@ enum CGDataVersion {
   // Version 1 is the first version. This version supports the outlined
   // hash tree.
   Version1 = 1,
+  // Version 2 supports the stable function merging map.
+  Version2 = 2,
   CurrentVersion = CG_DATA_INDEX_VERSION
 };
 const uint64_t Version = CGDataVersion::CurrentVersion;
@@ -258,6 +279,7 @@ struct Header {
   uint32_t Version;
   uint32_t DataKind;
   uint64_t OutlinedHashTreeOffset;
+  uint64_t StableFunctionMapOffset;
 
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc
index 08ec14ea051a0c..e0ae7a51024d87 100644
--- a/llvm/include/llvm/CGData/CodeGenData.inc
+++ b/llvm/include/llvm/CGData/CodeGenData.inc
@@ -20,6 +20,8 @@
 #define CG_DATA_DEFINED
 CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
                    CG_DATA_OUTLINE_COFF, "__DATA,")
+CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
+                   CG_DATA_MERGE_COFF, "__DATA,")
 
 #undef CG_DATA_SECT_ENTRY
 #endif
@@ -27,20 +29,24 @@ CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
 /* section name strings common to all targets other
    than WIN32 */
 #define CG_DATA_OUTLINE_COMMON __llvm_outline
+#define CG_DATA_MERGE_COMMON __llvm_merge
 /* Since cg data sections are not allocated, we don't need to
  * access them at runtime.
  */
 #define CG_DATA_OUTLINE_COFF ".loutline"
+#define CG_DATA_MERGE_COFF ".lmerge"
 
 #ifdef _WIN32
 /* Runtime section names and name strings.  */
-#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
+#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF
+#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF
 
 #else
 /* Runtime section names and name strings.  */
-#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
+#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
+#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON)
 
 #endif
 
 /* Indexed codegen data format version (start from 1). */
-#define CG_DATA_INDEX_VERSION 1
+#define CG_DATA_INDEX_VERSION 2
diff --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h
index 7e4882df2116e2..085dd6dd747c90 100644
--- a/llvm/include/llvm/CGData/CodeGenDataReader.h
+++ b/llvm/include/llvm/CGData/CodeGenDataReader.h
@@ -15,6 +15,7 @@
 
 #include "llvm/CGData/CodeGenData.h"
 #include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/VirtualFileSystem.h"
 
@@ -36,10 +37,15 @@ class CodeGenDataReader {
   virtual CGDataKind getDataKind() const = 0;
   /// Return true if the data has an outlined hash tree.
   virtual bool hasOutlinedHashTree() const = 0;
+  /// Return true if the data has a stable function map.
+  virtual bool hasStableFunctionMap() const = 0;
   /// Return the outlined hash tree that is released from the reader.
   std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
     return std::move(HashTreeRecord.HashTree);
   }
+  std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() {
+    return std::move(FunctionMapRecord.FunctionMap);
+  }
 
   /// Factory method to create an appropriately typed reader for the given
   /// codegen data file path and file system.
@@ -56,15 +62,21 @@ class CodeGenDataReader {
   /// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
   /// Optionally, \p CombinedHash can be used to compuate the combined hash of
   /// the merged data.
-  static Error mergeFromObjectFile(const object::ObjectFile *Obj,
-                                   OutlinedHashTreeRecord &GlobalOutlineRecord,
-                                   stable_hash *CombinedHash = nullptr);
+  static Error
+  mergeFromObjectFile(const object::ObjectFile *Obj,
+                      OutlinedHashTreeRecord &GlobalOutlineRecord,
+                      StableFunctionMapRecord &GlobalFunctionMapRecord,
+                      stable_hash *CombinedHash = nullptr);
 
 protected:
   /// The outlined hash tree that has been read. When it's released by
   /// releaseOutlinedHashTree(), it's no longer valid.
   OutlinedHashTreeRecord HashTreeRecord;
 
+  /// The stable function map that has been read. When it's released by
+  // releaseStableFunctionMap(), it's no longer valid.
+  StableFunctionMapRecord FunctionMapRecord;
+
   /// Set the current error and return same.
   Error error(cgdata_error Err, const std::string &ErrMsg = "") {
     LastError = Err;
@@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader {
     return Header.DataKind &
            static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
   }
+  /// Return true if the header indicates the data has a stable function map.
+  bool hasStableFunctionMap() const override {
+    return Header.DataKind &
+           static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
+  }
 };
 
 /// This format is a simple text format that's suitable for test data.
@@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader {
     return static_cast<uint32_t>(DataKind) &
            static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
   }
+  /// Return true if the header indicates the data has a stable function map.
+  /// This does not mean that the data is still available.
+  bool hasStableFunctionMap() const override {
+    return static_cast<uint32_t>(DataKind) &
+           static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/CGData/CodeGenDataWriter.h b/llvm/include/llvm/CGData/CodeGenDataWriter.h
index 5cb8377b1d07e5..1c4247608999a7 100644
--- a/llvm/include/llvm/CGData/CodeGenDataWriter.h
+++ b/llvm/include/llvm/CGData/CodeGenDataWriter.h
@@ -15,6 +15,7 @@
 
 #include "llvm/CGData/CodeGenData.h"
 #include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Error.h"
 
@@ -57,6 +58,9 @@ class CodeGenDataWriter {
   /// The outlined hash tree to be written.
   OutlinedHashTreeRecord HashTreeRecord;
 
+  /// The stable function map to be written.
+  StableFunctionMapRecord FunctionMapRecord;
+
   /// A bit mask describing the kind of the codegen data.
   CGDataKind DataKind = CGDataKind::Unknown;
 
@@ -64,9 +68,12 @@ class CodeGenDataWriter {
   CodeGenDataWriter() = default;
   ~CodeGenDataWriter() = default;
 
-  /// Add the outlined hash tree record. The input Record is released.
+  /// Add the outlined hash tree record. The input hash tree is released.
   void addRecord(OutlinedHashTreeRecord &Record);
 
+  /// Add the stable function map record. The input function map is released.
+  void addRecord(StableFunctionMapRecord &Record);
+
   /// Write the codegen data to \c OS
   Error write(raw_fd_ostream &OS);
 
@@ -81,11 +88,19 @@ class CodeGenDataWriter {
     return static_cast<uint32_t>(DataKind) &
            static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
   }
+  /// Return true if the header indicates the data has a stable function map.
+  bool hasStableFunctionMap() const {
+    return static_cast<uint32_t>(DataKind) &
+           static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
+  }
 
 private:
   /// The offset of the outlined hash tree in the file.
   uint64_t OutlinedHashTreeOffset;
 
+  /// The offset of the stable function map in the file.
+  uint64_t StableFunctionMapOffset;
+
   /// Write the codegen data header to \c COS
   Error writeHeader(CGDataOStream &COS);
 
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 2a3a74c8bc37af..88dcdfd1f931a2 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/CGData/CodeGenDataReader.h"
 #include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Caching.h"
 #include "llvm/Support/CommandLine.h"
@@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() {
       auto Reader = ReaderOrErr->get();
       if (Reader->hasOutlinedHashTree())
         Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
+      if (Reader->hasStableFunctionMap())
+        Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
     }
   });
   return *Instance;
@@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
     return make_error<CGDataError>(cgdata_error::unsupported_version);
   H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
 
-  switch (H.Version) {
-    // When a new field is added to the header add a case statement here to
-    // compute the size as offset of the new field + size of the new field. This
-    // relies on the field being added to the end of the list.
-    static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
-                  "Please update the size computation below if a new field has "
-                  "been added to the header, if not add a case statement to "
-                  "fall through to the latest version.");
-  case 1ull:
-    H.OutlinedHashTreeOffset =
+  static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
+                "Please update the offset computation below if a new field has "
+                "been added to the header.");
+  H.OutlinedHashTreeOffset =
+      endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
+  if (H.Version >= 2)
+    H.StableFunctionMapOffset =
         endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
-  }
 
   return H;
 }
@@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
 
 Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
   OutlinedHashTreeRecord GlobalOutlineRecord;
+  StableFunctionMapRecord GlobalStableFunctionMapRecord;
   stable_hash CombinedHash = 0;
   for (auto File : ObjFiles) {
     if (File.empty())
@@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
 
     std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
     if (auto E = CodeGenDataReader::mergeFromObjectFile(
-            Obj.get(), GlobalOutlineRecord, &CombinedHash))
+            Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
+            &CombinedHash))
       return E;
   }
 
+  GlobalStableFunctionMapRecord.finalize();
+
   if (!GlobalOutlineRecord.empty())
     cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
+  if (!GlobalStableFunctionMapRecord.empty())
+    cgdata::publishStableFunctionMap(
+        std::move(GlobalStableFunctionMapRecord.FunctionMap));
 
   return CombinedHash;
 }
diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index 2f2481ea60f822..ebeb4ae36f99f3 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -32,10 +32,40 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
 
 Error CodeGenDataReader::mergeFromObjectFile(
     const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
+    StableFunctionMapRecord &GlobalFunctionMapRecord,
     stable_hash *CombinedHash) {
   Triple TT = Obj->makeTriple();
   auto CGOutLineName =
       getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
+  auto CGMergeName =
+      getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
+
+  auto processSectionContents = [&](const StringRef &Name,
+                                    const StringRef &Contents) {
+    if (Name != CGOutLineName && Name != CGMergeName)
+      return;
+    if (CombinedHash)
+      *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
+    auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
+    auto *EndData = Data + Contents.size();
+    // In case dealing with an executable that has concatenated cgdata,
+    // we want to merge them into a single cgdata.
+    // Although it's not a typical workflow, we support this scenario
+    // by looping over all data in the sections.
+    if (Name == CGOutLineName) {
+      while (Data != EndData) {
+        OutlinedHashTreeRecord LocalOutlineRecord;
+        LocalOutlineRecord.deserialize(Data);
+        GlobalOutlineRecord.merge(LocalOutlineRecord);
+      }
+    } else if (Name == CGMergeName) {
+      while (Data != EndData) {
+        StableFunctionMapRecord LocalFunctionMapRecord;
+        LocalFunctionMapRecord.deserialize(Data);
+        GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
+      }
+    }
+  };
 
   for (auto &Section : Obj->sections()) {
     Expected<StringRef> NameOrErr = Section.getName();
@@ -44,23 +74,7 @@ Error CodeGenDataReader::mergeFromObjectFile(
     Expected<StringRef> ContentsOrErr = Section.getContents();
     if (!ContentsOrErr)
       return ContentsOrErr.takeError();
-    auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data());
-    auto *EndData = Data + ContentsOrErr->size();
-
-    if (*NameOrErr == CGOutLineName) {
-      if (CombinedHash)
-        *CombinedHash =
-            stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
-      // In case dealing with an executable that has concatenated cgdata,
-      // we want to merge them into a single cgdata.
-      // Although it's not a typical workflow, we support this scenario.
-      while (Data != EndData) {
-        OutlinedHashTreeRecord LocalOutlineRecord;
-        LocalOutlineRecord.deserialize(Data);
-        GlobalOutlineRecord.merge(LocalOutlineRecord);
-      }
-    }
-    // TODO: Add support for other cgdata sections.
+    processSectionContents(*NameOrErr, *ContentsOrErr);
   }
 
   return Error::success();
@@ -69,7 +83,8 @@ Error CodeGenDataReader::mergeFromObjectFile(
 Error IndexedCodeGenDataReader::read() {
   using namespace support;
 
-  // The smallest header with the version 1 is 24 bytes
+  // The smallest header with the version 1 is 24 bytes.
+  // Do not update this value even with the new version of the header.
   const unsigned MinHeaderSize = 24;
   if (DataBuffer->getBufferSize() < MinHeaderSize)
     return error(cgdata_error::bad_header);
@@ -87,6 +102,12 @@ Error IndexedCodeGenDataReader::read() {
       return error(cgdata_error::eof);
     HashTreeRecord.deserialize(Ptr);
   }
+  if (hasStableFunctionMap()) {
+    const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
+    if (Ptr >= End)
+      return error(cgdata_error::eof);
+    FunctionMapRecord.deserialize(Ptr);
+  }
 
   return success();
 }
@@ -152,6 +173,8 @@ Error TextCodeGenDataReader::read() {
     StringRef Str = Line->drop_front().rtrim();
     if (Str.equals_insensitive("outlined_hash_tree"))
       DataKind |= CGDataKind::FunctionOutlinedHashTree;
+    else if (Str.equals_insensitive("stable_function_map"))
+      DataKind |= CGDataKind::StableFunctionMergingMap;
     else
       return error(cgdata_error::bad_header);
   }
@@ -170,8 +193,8 @@ Error TextCodeGenDataReader::read() {
   yaml::Input YOS(StringRef(Pos, Size));
   if (hasOutlinedHashTree())
     HashTreeRecord.deserializeYAML(YOS);
-
-  // TODO: Add more yaml cgdata in order
+  if (hasStableFunctionMap())
+    FunctionMapRecord.deserializeYAML(YOS);
 
   return Error::success();
 }
diff --git a/llvm/lib/CGData/CodeGenDataWriter.cpp b/llvm/lib/CGData/CodeGenDataWriter.cpp
index 5f638be0fefe74..3a392036198a97 100644
--- a/llvm/lib/CGData/CodeGenDataWriter.cpp
+++ b/llvm/lib/CGData/CodeGenDataWriter.cpp
@@ -52,6 +52,13 @@ void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) {
   DataKind |= CGDataKind::FunctionOutlinedHashTree;
 }
 
+void CodeGenDataWriter::addRecord(StableFunctionMapRecord &Record) {
+  assert(Record.FunctionMap && "empty function map in the record");
+  FunctionMapRecord.FunctionMap = std::move(Record.FunctionMap);
+
+  DataKind |= CGDataKind::StableFunctionMergingMap;
+}
+
 Error CodeGenDataWriter::write(raw_fd_ostream &OS) {
   CGDataOStream COS(OS);
   return writeImpl(COS);
@@ -68,8 +75,11 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
   if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree))
     Header.DataKind |=
         static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
-
+  if (static_cast<bool>(DataKind & CGDataKind::StableFunctionMergingMap))
+    Header.DataKind |=
+        static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
   Header.OutlinedHashTreeOffset = 0;
+  Header.StableFunctionMapOffset = 0;
 
   // Only write up to the CGDataKind. We need to remember the offset of the
   // remaining fields to allow back-patching later.
@@ -83,6 +93,12 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
   // Reserve the space for OutlinedHashTreeOffset field.
   COS.write(0);
 
+  // Save the location of Header.StableFunctionMapOffset field in \c COS.
+  StableFunctionMapOffset = COS.tell();
+
+  // Reserve the space for StableFunctionMapOffset field.
+  COS.write(0);
+
   return Error::success();
 }
 
@@ -93,10 +109,14 @@ Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
   uint64_t OutlinedHashTreeFieldStart = COS.tell();
   if (hasOutlinedHashTree())
     HashTreeRecord.serialize(COS.OS);
+  uint64_t StableFunctionMapFieldStart = COS.tell();
+  if (hasStableFunctionMap())
+    FunctionMapRecord.serialize(COS.OS);
 
   // Back patch the offsets.
   CGDataPatchItem PatchItems[] = {
-      {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}};
+      {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1},
+      {StableFunctionMapOffset, &StableFunctionMapFieldStart, 1}};
   COS.patch(PatchItems);
 
   return Error::success();
@@ -106,6 +126,9 @@ Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) {
   if (hasOutlinedHashTree())
     OS << "# Outlined stable hash tree\n:outlined_hash_tree\n";
 
+  if (hasStableFunctionMap())
+    OS << "# Stable function map\n:stable_function_map\n";
+
   // TODO: Add more data types in this header
 
   return Error::success();
@@ -119,6 +142,9 @@ Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) {
   if (hasOutlinedHashTree())
     HashTreeRecord.serializeYAML(YOS);
 
+  if (hasStableFunctionMap())
+    FunctionMapRecord.serializeYAML(YOS);
+
   // TODO: Write more yaml cgdata in order
 
   return Error::success();
diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
index 70d5ea4b800630..bea78d512a6db7 100644
--- a/llvm/test/tools/llvm-cgdata/empty.test
+++ b/llvm/test/tools/llvm-cgdata/empty.test
@@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
 
 # The version number appears when asked, as it's in the header
 RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
-VERSION: Version: 1
+VERSION: Version: 2
 
 # When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
 RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@@ -27,9 +27,11 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
 #   uint32_t Version;
 #   uint32_t DataKind;
 #   uint64_t OutlinedHashTreeOffset;
+#   uint64_t StableFunctionMapOffset;
 # }
 RUN: printf '\xffcgdata\x81' > %t_header.cgdata
-RUN: printf '\x01\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x02\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
 RUN: diff %t_header.cgdata %t_emptyheader.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
index c992174505c1ad..2caa3aef403950 100644
--- a/llvm/test/tools/llvm-cgdata/error.test
+++ b/llvm/test/tools/llvm-cgdata/error.test
@@ -6,6 +6,7 @@
 #   uint32_t Version;
 #   uint32_t DataKind;
 #   uint64_t OutlinedHashTreeOffset;
+#   uint64_t StableFunctionMapOffset;
 # }
 RUN: touch %t_empty.cgdata
 RUN: not llvm-cgdata --show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY
@@ -21,18 +22,20 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
 RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s  --check-prefix=CORRUPT
 CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
 
-# The current version 1 while the header says 2.
+# The current version 2 while the header says 3.
 RUN: printf '\xffcgdata\x81' > %t_version.cgdata
-RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x03\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
 RUN: not llvm-cgdata --show %t_version.cgdata 2>&1 | FileCheck %s  --check-prefix=BAD_VERSION
 BAD_VERSION: {{.}}cgdata: unsupported codegen data version
 
 # Header says an outlined hash tree, but the file ends after the header.
 RUN: printf '\xffcgdata\x81' > %t_eof.cgdata
+RUN: printf '\x02\x00\x00\x00' >> %t_eof.cgdata
 RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
-RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
 RUN: not llvm-cgdata --show %t_eof.cgdata 2>&1 | FileCheck %s  --check-prefix=EOF
 EOF: {{.}}cgdata: end of File
diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
new file mode 100644
index 00000000000000..b9bf067d3771c5
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
@@ -0,0 +1,66 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Test merge a single object file having both __llvm_outline and __llvm_merge into a cgdata.
+# Effectively, this test combines merge-hashtree.test and merge-funcmap.test.
+
+RUN: split-file %s %t
+
+# Synthesize raw hashtree bytes without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-hashtree.cgtext -o %t/raw-hashtree.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-hashtree.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-hashtree-bytes.txt
+
+# Synthesize raw funcmap bytes without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-funcmap.cgtext -o %t/raw-funcmap.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-funcmap.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-funcmap-bytes.txt
+
+# Synthesize a bitcode file by creating two sections for the hash tree and the function map, respectively.
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-hashtree-bytes.txt)/g" %t/merge-both-template.ll > %t/merge-both-hashtree-template.ll
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-funcmap-bytes.txt)/g" %t/merge-both-hashtree-template.ll > %t/merge-both-hashtree-funcmap.ll
+
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcmap.ll -o %t/merge-both-hashtree-funcmap.o
+
+# Merge an object file having cgdata (__llvm_outline and __llvm_merge)
+RUN: llvm-cgdata -m %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
+RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s
+
+CHECK: Outlined hash tree:
+CHECK-NEXT:  Total Node Count: 3
+CHECK-NEXT:  Terminal Node Count: 1
+CHECK-NEXT:  Depth: 2
+CHECK-NEXT: Stable function map:
+CHECK-NEXT:  Unique hash Count: 1
+CHECK-NEXT:  Total function Count: 1
+CHECK-NEXT:  Mergeable function Count: 0
+
+;--- raw-hashtree.cgtext
+:outlined_hash_tree
+0:
+  Hash:            0x0
+  Terminals:       0
+  SuccessorIds:    [ 1 ]
+1:
+  Hash:            0x1
+  Terminals:       0
+  SuccessorIds:    [ 2 ]
+2:
+  Hash:            0x2
+  Terminals:       4
+  SuccessorIds:    [  ]
+...
+
+;--- raw-funcmap.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func1
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        3
+...
+
+;--- merge-both-template.ll
+ at .data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
+ at .data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
new file mode 100644
index 00000000000000..f643c8d92073e3
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
@@ -0,0 +1,83 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge an archive that has two object files having cgdata (__llvm_merge)
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
+
+# Make an archive from two object files
+RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
+
+# Merge the archive into the codegen data file.
+RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata
+RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
+
+RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s
+CHECK: Stable function map:
+CHECK-NEXT:  Unique hash Count: 1
+CHECK-NEXT:  Total function Count: 2
+CHECK-NEXT:  Mergeable function Count: 2
+
+RUN: llvm-cgdata --convert %t/merge-archive.cgdata| FileCheck %s --check-prefix=MAP
+MAP: # Stable function map
+MAP-NEXT: :stable_function_map
+MAP-NEXT: ---
+MAP-NEXT: - Hash:            1
+MAP-NEXT:   FunctionName:    Func1
+MAP-NEXT:   ModuleName:      Mod1
+MAP-NEXT:   InstCount:       2
+MAP-NEXT:   IndexOperandHashes:
+MAP-NEXT:     - InstIndex:       0
+MAP-NEXT:       OpndIndex:       1
+MAP-NEXT:       OpndHash:        3
+MAP-NEXT: - Hash:            1
+MAP-NEXT:   FunctionName:    Func2
+MAP-NEXT:   ModuleName:      Mod1
+MAP-NEXT:   InstCount:       2
+MAP-NEXT:   IndexOperandHashes:
+MAP-NEXT:     - InstIndex:       0
+MAP-NEXT:       OpndIndex:       1
+MAP-NEXT:       OpndHash:        4
+MAP-NEXT: ...
+
+;--- raw-1.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func2
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        4
+...
+
+;--- merge-1-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+
+;--- raw-2.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func1
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        3
+...
+
+;--- merge-2-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
new file mode 100644
index 00000000000000..c8acf1f3916e5a
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
@@ -0,0 +1,78 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge a binary file (e.g., a linked executable) having concatenated cgdata (__llvm_merge)
+
+RUN: split-file %s %t
+
+# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata.
+# Concatenate them in merge-concat.ll
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll
+
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
+RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata
+RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
+
+CHECK: Stable function map:
+CHECK-NEXT:  Unique hash Count: 1
+CHECK-NEXT:  Total function Count: 2
+CHECK-NEXT:  Mergeable function Count: 2
+
+RUN: llvm-cgdata --convert %t/merge-concat.cgdata| FileCheck %s --check-prefix=MAP
+MAP: # Stable function map
+MAP-NEXT: :stable_function_map
+MAP-NEXT: ---
+MAP-NEXT: - Hash:            1
+MAP-NEXT:   FunctionName:    Func1
+MAP-NEXT:   ModuleName:      Mod1
+MAP-NEXT:   InstCount:       2
+MAP-NEXT:   IndexOperandHashes:
+MAP-NEXT:     - InstIndex:       0
+MAP-NEXT:       OpndIndex:       1
+MAP-NEXT:       OpndHash:        3
+MAP-NEXT: - Hash:            1
+MAP-NEXT:   FunctionName:    Func2
+MAP-NEXT:   ModuleName:      Mod1
+MAP-NEXT:   InstCount:       2
+MAP-NEXT:   IndexOperandHashes:
+MAP-NEXT:     - InstIndex:       0
+MAP-NEXT:       OpndIndex:       1
+MAP-NEXT:       OpndHash:        4
+MAP-NEXT: ...
+
+;--- raw-1.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func2
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        4
+...
+
+;--- raw-2.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func1
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        3
+...
+
+;--- merge-concat-template.ll
+
+; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated.
+; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated.
+; In other words, the following two trees are encoded back-to-back in a binary format.
+ at .data1 = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
new file mode 100644
index 00000000000000..3ae67f062f820f
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
@@ -0,0 +1,79 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge two object files having cgdata (__llvm_merge)
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
+
+# Merge two object files into the codegen data file.
+RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
+
+RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
+CHECK: Stable function map:
+CHECK-NEXT:  Unique hash Count: 1
+CHECK-NEXT:  Total function Count: 2
+CHECK-NEXT:  Mergeable function Count: 2
+
+RUN: llvm-cgdata --convert %t/merge.cgdata | FileCheck %s --check-prefix=MAP
+MAP: # Stable function map
+MAP-NEXT: :stable_function_map
+MAP-NEXT: ---
+MAP-NEXT: - Hash:            1
+MAP-NEXT:   FunctionName:    Func1
+MAP-NEXT:   ModuleName:      Mod1
+MAP-NEXT:   InstCount:       2
+MAP-NEXT:   IndexOperandHashes:
+MAP-NEXT:     - InstIndex:       0
+MAP-NEXT:       OpndIndex:       1
+MAP-NEXT:       OpndHash:        3
+MAP-NEXT: - Hash:            1
+MAP-NEXT:   FunctionName:    Func2
+MAP-NEXT:   ModuleName:      Mod1
+MAP-NEXT:   InstCount:       2
+MAP-NEXT:   IndexOperandHashes:
+MAP-NEXT:     - InstIndex:       0
+MAP-NEXT:       OpndIndex:       1
+MAP-NEXT:       OpndHash:        4
+MAP-NEXT: ...
+
+;--- raw-1.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func2
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        4
+...
+
+;--- merge-1-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+
+;--- raw-2.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func1
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        3
+...
+
+;--- merge-2-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
new file mode 100644
index 00000000000000..6a4e635f638657
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
@@ -0,0 +1,36 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Test merge a single object file into a cgdata
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
+
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o
+
+# Merge an object file having cgdata (__llvm_merge)
+RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata
+RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
+CHECK: Stable function map:
+CHECK-NEXT:  Unique hash Count: 1
+CHECK-NEXT:  Total function Count: 1
+CHECK-NEXT:  Mergeable function Count: 0
+
+;--- raw-single.cgtext
+:stable_function_map
+- Hash:            1
+  FunctionName:    Func1
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        3
+...
+
+;--- merge-single-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-archive.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test
similarity index 91%
rename from llvm/test/tools/llvm-cgdata/merge-archive.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test
index 03eb9106b54562..ee6345247c5be6 100644
--- a/llvm/test/tools/llvm-cgdata/merge-archive.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test
@@ -5,15 +5,15 @@
 
 RUN: split-file %s %t
 
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
 RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
 RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
 
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
 RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
 RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
 
diff --git a/llvm/test/tools/llvm-cgdata/merge-concat.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test
similarity index 93%
rename from llvm/test/tools/llvm-cgdata/merge-concat.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test
index ac0e7a6e29e878..5a3ece05a3f990 100644
--- a/llvm/test/tools/llvm-cgdata/merge-concat.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test
@@ -5,13 +5,13 @@
 
 RUN: split-file %s %t
 
-# Synthesize two sets of raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata.
 # Concatenate them in merge-concat.ll
 RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
 RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll
 RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
 RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll
 
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
diff --git a/llvm/test/tools/llvm-cgdata/merge-double.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test
similarity index 90%
rename from llvm/test/tools/llvm-cgdata/merge-double.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-double.test
index 1ae8064291019e..044a8649cf4adf 100644
--- a/llvm/test/tools/llvm-cgdata/merge-double.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test
@@ -5,15 +5,15 @@
 
 RUN: split-file %s %t
 
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
 RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
 RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
 
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
 RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
 RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
 
diff --git a/llvm/test/tools/llvm-cgdata/merge-single.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test
similarity index 92%
rename from llvm/test/tools/llvm-cgdata/merge-single.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-single.test
index 47e3cb3f4f50fb..829c63f0f17a2c 100644
--- a/llvm/test/tools/llvm-cgdata/merge-single.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test
@@ -11,9 +11,9 @@ RUN: llvm-cgdata --merge %t/merge-empty.o --output %t/merge-empty.cgdata
 # No summary appear with the header only cgdata.
 RUN: llvm-cgdata --show %t/merge-empty.cgdata | count 0
 
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
 RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
 
 RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o
diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
index 483f4662631284..0931cad4bcb7ed 100644
--- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
+++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
@@ -80,8 +80,6 @@ static CGDataAction Action;
 static std::optional<CGDataFormat> OutputFormat;
 static std::vector<std::string> InputFilenames;
 
-// TODO: Add a doc, https://llvm.org/docs/CommandGuide/llvm-cgdata.html
-
 static void exitWithError(Twine Message, std::string Whence = "",
                           std::string Hint = "") {
   WithColor::error();
@@ -128,6 +126,10 @@ static int convert_main(int argc, const char *argv[]) {
     OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree());
     Writer.addRecord(Record);
   }
+  if (Reader->hasStableFunctionMap()) {
+    StableFunctionMapRecord Record(Reader->releaseStableFunctionMap());
+    Writer.addRecord(Record);
+  }
 
   if (OutputFormat == CGDataFormat::Text) {
     if (Error E = Writer.writeText(OS))
@@ -141,10 +143,12 @@ static int convert_main(int argc, const char *argv[]) {
 }
 
 static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
-                         OutlinedHashTreeRecord &GlobalOutlineRecord);
+                         OutlinedHashTreeRecord &GlobalOutlineRecord,
+                         StableFunctionMapRecord &GlobalFunctionMapRecord);
 
 static bool handleArchive(StringRef Filename, Archive &Arch,
-                          OutlinedHashTreeRecord &GlobalOutlineRecord) {
+                          OutlinedHashTreeRecord &GlobalOutlineRecord,
+                          StableFunctionMapRecord &GlobalFunctionMapRecord) {
   bool Result = true;
   Error Err = Error::success();
   for (const auto &Child : Arch.children(Err)) {
@@ -155,7 +159,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch,
     if (Error E = NameOrErr.takeError())
       exitWithError(std::move(E), Filename);
     std::string Name = (Filename + "(" + NameOrErr.get() + ")").str();
-    Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord);
+    Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord,
+                           GlobalFunctionMapRecord);
   }
   if (Err)
     exitWithError(std::move(Err), Filename);
@@ -163,7 +168,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch,
 }
 
 static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
-                         OutlinedHashTreeRecord &GlobalOutlineRecord) {
+                         OutlinedHashTreeRecord &GlobalOutlineRecord,
+                         StableFunctionMapRecord &GlobalFunctionMapRecord) {
   Expected<std::unique_ptr<object::Binary>> BinOrErr =
       object::createBinary(Buffer);
   if (Error E = BinOrErr.takeError())
@@ -171,11 +177,12 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
 
   bool Result = true;
   if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
-    if (Error E =
-            CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord))
+    if (Error E = CodeGenDataReader::mergeFromObjectFile(
+            Obj, GlobalOutlineRecord, GlobalFunctionMapRecord))
       exitWithError(std::move(E), Filename);
   } else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) {
-    Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord);
+    Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord,
+                            GlobalFunctionMapRecord);
   } else {
     // TODO: Support for the MachO universal binary format.
     errs() << "Error: unsupported binary file: " << Filename << "\n";
@@ -186,26 +193,34 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
 }
 
 static bool handleFile(StringRef Filename,
-                       OutlinedHashTreeRecord &GlobalOutlineRecord) {
+                       OutlinedHashTreeRecord &GlobalOutlineRecord,
+                       StableFunctionMapRecord &GlobalFunctionMapRecord) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
       MemoryBuffer::getFileOrSTDIN(Filename);
   if (std::error_code EC = BuffOrErr.getError())
     exitWithErrorCode(EC, Filename);
-  return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord);
+  return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord,
+                      GlobalFunctionMapRecord);
 }
 
 static int merge_main(int argc, const char *argv[]) {
   bool Result = true;
   OutlinedHashTreeRecord GlobalOutlineRecord;
+  StableFunctionMapRecord GlobalFunctionMapRecord;
   for (auto &Filename : InputFilenames)
-    Result &= handleFile(Filename, GlobalOutlineRecord);
+    Result &=
+        handleFile(Filename, GlobalOutlineRecord, GlobalFunctionMapRecord);
 
   if (!Result)
     exitWithError("failed to merge codegen data files.");
 
+  GlobalFunctionMapRecord.finalize();
+
   CodeGenDataWriter Writer;
   if (!GlobalOutlineRecord.empty())
     Writer.addRecord(GlobalOutlineRecord);
+  if (!GlobalFunctionMapRecord.empty())
+    Writer.addRecord(GlobalFunctionMapRecord);
 
   std::error_code EC;
   raw_fd_ostream OS(OutputFilename, EC,
@@ -249,6 +264,15 @@ static int show_main(int argc, const char *argv[]) {
        << "\n";
     OS << "  Depth: " << Tree->depth() << "\n";
   }
+  if (Reader->hasStableFunctionMap()) {
+    auto Map = Reader->releaseStableFunctionMap();
+    OS << "Stable function map:\n";
+    OS << "  Unique hash Count: " << Map->size() << "\n";
+    OS << "  Total function Count: "
+       << Map->size(StableFunctionMap::TotalFunctionCount) << "\n";
+    OS << "  Mergeable function Count: "
+       << Map->size(StableFunctionMap::MergeableFunctionCount) << "\n";
+  }
 
   return 0;
 }

>From 5425db1621336e8a91d4165275e17d5a2e7bed00 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 29 Oct 2024 00:27:53 -0700
Subject: [PATCH 3/5] Address comments from ellishg

---
 llvm/lib/CGData/CodeGenDataReader.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index ebeb4ae36f99f3..39513d422c2c98 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -35,14 +35,14 @@ Error CodeGenDataReader::mergeFromObjectFile(
     StableFunctionMapRecord &GlobalFunctionMapRecord,
     stable_hash *CombinedHash) {
   Triple TT = Obj->makeTriple();
-  auto CGOutLineName =
+  auto CGOutlineName =
       getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
   auto CGMergeName =
       getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
 
   auto processSectionContents = [&](const StringRef &Name,
                                     const StringRef &Contents) {
-    if (Name != CGOutLineName && Name != CGMergeName)
+    if (Name != CGOutlineName && Name != CGMergeName)
       return;
     if (CombinedHash)
       *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
@@ -52,7 +52,7 @@ Error CodeGenDataReader::mergeFromObjectFile(
     // we want to merge them into a single cgdata.
     // Although it's not a typical workflow, we support this scenario
     // by looping over all data in the sections.
-    if (Name == CGOutLineName) {
+    if (Name == CGOutlineName) {
       while (Data != EndData) {
         OutlinedHashTreeRecord LocalOutlineRecord;
         LocalOutlineRecord.deserialize(Data);

>From 06a25007d8e9ff1f970e4d343ec0918059f46198 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 30 Aug 2024 00:09:09 -0700
Subject: [PATCH 4/5] [CGData] Global Merge Functions

---
 llvm/include/llvm/CGData/CodeGenData.h        |  11 +
 llvm/include/llvm/InitializePasses.h          |   1 +
 llvm/include/llvm/LinkAllPasses.h             |   1 +
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |   1 +
 llvm/include/llvm/Transforms/IPO.h            |   2 +
 .../Transforms/IPO/GlobalMergeFunctions.h     |  77 ++
 llvm/lib/CodeGen/TargetPassConfig.cpp         |   3 +
 llvm/lib/LTO/LTO.cpp                          |   1 +
 llvm/lib/Transforms/IPO/CMakeLists.txt        |   2 +
 .../Transforms/IPO/GlobalMergeFunctions.cpp   | 743 ++++++++++++++++++
 .../ThinLTO/AArch64/cgdata-merge-local.ll     |  62 ++
 .../test/ThinLTO/AArch64/cgdata-merge-read.ll |  82 ++
 .../AArch64/cgdata-merge-two-rounds.ll        |  68 ++
 .../ThinLTO/AArch64/cgdata-merge-write.ll     |  97 +++
 llvm/tools/llvm-lto2/CMakeLists.txt           |   1 +
 llvm/tools/llvm-lto2/llvm-lto2.cpp            |   6 +
 16 files changed, 1158 insertions(+)
 create mode 100644 llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
 create mode 100644 llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
 create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
 create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
 create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
 create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll

diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 5d7c74725ccef1..da0e412f2a0e03 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -145,6 +145,9 @@ class CodeGenData {
   const OutlinedHashTree *getOutlinedHashTree() {
     return PublishedHashTree.get();
   }
+  const StableFunctionMap *getStableFunctionMap() {
+    return PublishedStableFunctionMap.get();
+  }
 
   /// Returns true if we should write codegen data.
   bool emitCGData() { return EmitCGData; }
@@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() {
   return CodeGenData::getInstance().hasOutlinedHashTree();
 }
 
+inline bool hasStableFunctionMap() {
+  return CodeGenData::getInstance().hasStableFunctionMap();
+}
+
 inline const OutlinedHashTree *getOutlinedHashTree() {
   return CodeGenData::getInstance().getOutlinedHashTree();
 }
 
+inline const StableFunctionMap *getStableFunctionMap() {
+  return CodeGenData::getInstance().getStableFunctionMap();
+}
+
 inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
 
 inline void
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 26f5d63553c5a8..b11d66f4b9d024 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &);
 void initializeGCMachineCodeAnalysisPass(PassRegistry &);
 void initializeGCModuleInfoPass(PassRegistry &);
 void initializeGVNLegacyPassPass(PassRegistry &);
+void initializeGlobalMergeFuncPass(PassRegistry &);
 void initializeGlobalMergePass(PassRegistry &);
 void initializeGlobalsAAWrapperPassPass(PassRegistry &);
 void initializeHardwareLoopsLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 3516b47d29ef36..d1219d6ee2a13e 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -79,6 +79,7 @@ struct ForcePassLinking {
     (void)llvm::createDomOnlyViewerWrapperPassPass();
     (void)llvm::createDomViewerWrapperPassPass();
     (void)llvm::createAlwaysInlinerLegacyPass();
+    (void)llvm::createGlobalMergeFuncPass();
     (void)llvm::createGlobalsAAWrapperPass();
     (void)llvm::createInstSimplifyLegacyPass();
     (void)llvm::createInstructionCombiningPass();
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index ad80c661147d6f..c5b4811815179d 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -77,6 +77,7 @@
 #include "llvm/Target/CGPassBuilderOption.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/CFGuard.h"
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
 #include "llvm/Transforms/Scalar/ConstantHoisting.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index ee0e35aa618325..86a8654f56997c 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -55,6 +55,8 @@ enum class PassSummaryAction {
   Export, ///< Export information to summary.
 };
 
+Pass *createGlobalMergeFuncPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
new file mode 100644
index 00000000000000..565be54f89e882
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
@@ -0,0 +1,77 @@
+//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file defines global merge functions pass and related data structure.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StableHashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include <map>
+#include <mutex>
+
+enum class HashFunctionMode {
+  Local,
+  BuildingHashFuncion,
+  UsingHashFunction,
+};
+
+namespace llvm {
+
+// A vector of locations (the pair of (instruction, operand) indices) reachable
+// from a parameter.
+using ParamLocs = SmallVector<IndexPair, 4>;
+// A vector of parameters
+using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
+// A map of stable hash to a vector of stable functions
+
+/// GlobalMergeFunc finds functions which only differ by constants in
+/// certain instructions, e.g. resulting from specialized functions of layout
+/// compatible types.
+/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
+/// hash to find the merge candidate. Similar to the global outliner, we can run
+/// codegen twice to collect function merge candidate in the first round, and
+/// merge functions globally in the second round.
+class GlobalMergeFunc : public ModulePass {
+  HashFunctionMode MergerMode = HashFunctionMode::Local;
+
+  std::unique_ptr<StableFunctionMap> LocalFunctionMap;
+
+public:
+  static char ID;
+
+  GlobalMergeFunc();
+
+  StringRef getPassName() const override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  void initializeMergerMode(const Module &M);
+
+  bool runOnModule(Module &M) override;
+
+  /// Analyze module to create stable function into LocalFunctionMap.
+  void analyze(Module &M);
+
+  /// Emit LocalFunctionMap into __llvm_merge section.
+  void emitFunctionMap(Module &M);
+
+  /// Merge functions in the module using the global function map.
+  bool merge(Module &M, const StableFunctionMap *FunctionMap);
+};
+
+} // end namespace llvm
+#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 12225c9946e9fc..1501d5ad95ba42 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -141,6 +141,9 @@ static cl::opt<RunOutliner> EnableMachineOutliner(
                           "Disable all outlining"),
                // Sentinel value for unspecified option.
                clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+cl::opt<bool> EnableGlobalMergeFunc(
+    "enable-global-merge-func", cl::Hidden,
+    cl::desc("Enable global merge functions that are based on hash function"));
 // Disable the pass to fix unwind information. Whether the pass is included in
 // the pipeline is controlled via the target options, this option serves as
 // manual override.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 0f53c608512171..3f88c6c475d4d8 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -53,6 +53,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 15cb57399d2460..6a36ed64149f93 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMipo
   GlobalDCE.cpp
   GlobalOpt.cpp
   GlobalSplit.cpp
+  GlobalMergeFunctions.cpp
   HotColdSplitting.cpp
   IPO.cpp
   IROutliner.cpp
@@ -60,6 +61,7 @@ add_llvm_component_library(LLVMipo
   Analysis
   BitReader
   BitWriter
+  CGData
   Core
   FrontendOpenMP
   InstCombine
diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
new file mode 100644
index 00000000000000..599de722814f21
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
@@ -0,0 +1,743 @@
+//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO:  This implements a function merge using function hash while tracking
+// differences in Constants. This uses stable function hash to find potential
+// merge candidates. The first codegen round collects stable function hashes,
+// and determines the merge candidates that match the stable function hashes.
+// The set of parameters pointing to different Constants are also computed
+// during the stable function merge. The second codegen round uses this global
+// function info to optimistically create a merged function in each module
+// context to guarantee correct transformation. Similar to the global outliner,
+// the linker's deduplication (ICF) folds the identical merged functions to save
+// the final binary size.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/CGData/CodeGenData.h"
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/StructuralHash.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#define DEBUG_TYPE "global-merge-func"
+
+using namespace llvm;
+using namespace llvm::support;
+
+static cl::opt<bool>
+    DisableGlobalMerging("disable-global-merging", cl::Hidden,
+                         cl::desc("Disable global merging only by ignoring "
+                                  "the codegen data generation or use. Local "
+                                  "merging is still enabled within a module."),
+                         cl::init(false));
+static cl::opt<unsigned> GlobalMergingMinInstrs(
+    "global-merging-min-instrs",
+    cl::desc("The minimum instruction count required when merging functions."),
+    cl::init(1), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingMaxParams(
+    "global-merging-max-params",
+    cl::desc(
+        "The maximum number of parameters allowed when merging functions."),
+    cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingParamOverhead(
+    "global-merging-param-overhead",
+    cl::desc("The overhead cost associated with each parameter when merging "
+             "functions."),
+    cl::init(2), cl::Hidden);
+static cl::opt<unsigned>
+    GlobalMergingCallOverhead("global-merging-call-overhead",
+                              cl::desc("The overhead cost associated with each "
+                                       "function call when merging functions."),
+                              cl::init(1), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingExtraThreshold(
+    "global-merging-extra-threshold",
+    cl::desc("An additional cost threshold that must be exceeded for merging "
+             "to be considered beneficial."),
+    cl::init(0), cl::Hidden);
+
+extern cl::opt<bool> EnableGlobalMergeFunc;
+
+STATISTIC(NumMismatchedFunctionHashGlobalMergeFunction,
+          "Number of mismatched function hash for global merge function");
+STATISTIC(NumMismatchedInstCountGlobalMergeFunction,
+          "Number of mismatched instruction count for global merge function");
+STATISTIC(NumMismatchedConstHashGlobalMergeFunction,
+          "Number of mismatched const hash for global merge function");
+STATISTIC(NumMismatchedModuleIdGlobalMergeFunction,
+          "Number of mismatched Module Id for global merge function");
+STATISTIC(NumGlobalMergeFunctions,
+          "Number of functions that are actually merged using function hash");
+STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
+STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
+STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
+
+/// Returns true if the \opIdx operand of \p CI is the callee operand.
+static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
+  return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
+}
+
+static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) {
+  if (CI->isInlineAsm())
+    return false;
+  Function *Callee = CI->getCalledOperand()
+                         ? dyn_cast_or_null<Function>(
+                               CI->getCalledOperand()->stripPointerCasts())
+                         : nullptr;
+  if (Callee) {
+    if (Callee->isIntrinsic())
+      return false;
+    // objc_msgSend stubs must be called, and can't have their address taken.
+    if (Callee->getName().starts_with("objc_msgSend$"))
+      return false;
+  }
+  if (isCalleeOperand(CI, OpIdx) &&
+      CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) {
+    // The operand is the callee and it has already been signed. Ignore this
+    // because we cannot add another ptrauth bundle to the call instruction.
+    return false;
+  }
+  return true;
+}
+
+bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Load:
+  case Instruction::Store:
+  case Instruction::Call:
+  case Instruction::Invoke:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool isEligibleOperandForConstantSharing(const Instruction *I, unsigned OpIdx) {
+  assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+  if (!isEligibleInstrunctionForConstantSharing(I))
+    return false;
+
+  auto Opnd = I->getOperand(OpIdx);
+  if (!isa<Constant>(Opnd))
+    return false;
+
+  if (const auto *CI = dyn_cast<CallBase>(I))
+    return canParameterizeCallOperand(CI, OpIdx);
+
+  return true;
+}
+
+/// Returns true if function \p F is eligible for merging.
+bool isEligibleFunction(Function *F) {
+  if (F->isDeclaration())
+    return false;
+
+  if (F->hasFnAttribute(llvm::Attribute::NoMerge))
+    return false;
+
+  if (F->hasAvailableExternallyLinkage()) {
+    return false;
+  }
+
+  if (F->getFunctionType()->isVarArg()) {
+    return false;
+  }
+
+  if (F->getCallingConv() == CallingConv::SwiftTail)
+    return false;
+
+  // if function contains callsites with musttail, if we merge
+  // it, the merged function will have the musttail callsite, but
+  // the number of parameters can change, thus the parameter count
+  // of the callsite will mismatch with the function itself.
+  // if (IgnoreMusttailFunction) {
+  for (const BasicBlock &BB : *F) {
+    for (const Instruction &I : BB) {
+      const auto *CB = dyn_cast<CallBase>(&I);
+      if (CB && CB->isMustTailCall())
+        return false;
+    }
+  }
+
+  return true;
+}
+
+static bool
+isEligibleInstrunctionForConstantSharingLocal(const Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Load:
+  case Instruction::Store:
+  case Instruction::Call:
+  case Instruction::Invoke:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
+  assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+  if (!isEligibleInstrunctionForConstantSharingLocal(I))
+    return false;
+
+  if (!isa<Constant>(I->getOperand(OpIdx)))
+    return false;
+
+  if (const auto *CI = dyn_cast<CallBase>(I))
+    return canParameterizeCallOperand(CI, OpIdx);
+
+  return true;
+}
+
+// copy from merge functions.cpp
+static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
+  Type *SrcTy = V->getType();
+  if (SrcTy->isStructTy()) {
+    assert(DestTy->isStructTy());
+    assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+    Value *Result = PoisonValue::get(DestTy);
+    for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+      Value *Element =
+          createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+                     DestTy->getStructElementType(I));
+
+      Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+    }
+    return Result;
+  }
+  assert(!DestTy->isStructTy());
+  if (auto *SrcAT = dyn_cast<ArrayType>(SrcTy)) {
+    auto *DestAT = dyn_cast<ArrayType>(DestTy);
+    assert(DestAT);
+    assert(SrcAT->getNumElements() == DestAT->getNumElements());
+    Value *Result = UndefValue::get(DestTy);
+    for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) {
+      Value *Element =
+          createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+                     DestAT->getElementType());
+
+      Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+    }
+    return Result;
+  }
+  assert(!DestTy->isArrayTy());
+  if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+    return Builder.CreateIntToPtr(V, DestTy);
+  else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+    return Builder.CreatePtrToInt(V, DestTy);
+  else
+    return Builder.CreateBitCast(V, DestTy);
+}
+
+void GlobalMergeFunc::analyze(Module &M) {
+  ++NumAnalyzedModues;
+  for (Function &Func : M) {
+    ++NumAnalyzedFunctions;
+    if (isEligibleFunction(&Func)) {
+      ++NumEligibleFunctions;
+
+      auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
+
+      // Convert the map to a vector for a serialization-friendly format.
+      IndexOperandHashVecType IndexOperandHashes;
+      for (auto &Pair : *FI.IndexOperandHashMap)
+        IndexOperandHashes.emplace_back(Pair);
+
+      StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(),
+                        M.getModuleIdentifier(), FI.IndexInstruction->size(),
+                        std::move(IndexOperandHashes));
+
+      LocalFunctionMap->insert(SF);
+    }
+  }
+}
+
+/// Tuple to hold function info to process merging.
+struct FuncMergeInfo {
+  StableFunctionEntry *SF;
+  Function *F;
+  std::unique_ptr<IndexInstrMap> IndexInstruction;
+};
+
+// Given the func info, and the parameterized locations, create and return
+// a new merged function by replacing the original constants with the new
+// parameters.
+static Function *createMergedFunction(FuncMergeInfo &FI,
+                                      ArrayRef<Type *> ConstParamTypes,
+                                      const ParamLocsVecTy &ParamLocsVec) {
+  // Synthesize a new merged function name by appending ".Tgm" to the root
+  // function's name.
+  auto *MergedFunc = FI.F;
+  auto NewFunctionName = MergedFunc->getName().str() + ".Tgm";
+  auto *M = MergedFunc->getParent();
+  assert(!M->getFunction(NewFunctionName));
+
+  FunctionType *OrigTy = MergedFunc->getFunctionType();
+  // Get the original params' types.
+  SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end());
+  // Append const parameter types that are passed in.
+  ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end());
+  FunctionType *FuncType =
+      FunctionType::get(OrigTy->getReturnType(), ParamTypes, false);
+
+  // Declare a new function
+  Function *NewFunction =
+      Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName);
+  if (auto *SP = MergedFunc->getSubprogram())
+    NewFunction->setSubprogram(SP);
+  NewFunction->copyAttributesFrom(MergedFunc);
+  NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass);
+
+  NewFunction->setLinkage(GlobalValue::InternalLinkage);
+  NewFunction->addFnAttr(Attribute::NoInline);
+
+  // Add the new function before the root function.
+  M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction);
+
+  // Move the body of MergedFunc into the NewFunction.
+  NewFunction->splice(NewFunction->begin(), MergedFunc);
+
+  // Update the original args by the new args.
+  auto NewArgIter = NewFunction->arg_begin();
+  for (Argument &OrigArg : MergedFunc->args()) {
+    Argument &NewArg = *NewArgIter++;
+    OrigArg.replaceAllUsesWith(&NewArg);
+  }
+
+  // Replace the original Constants by the new args.
+  unsigned NumOrigArgs = MergedFunc->arg_size();
+  for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) {
+    Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx);
+    for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) {
+      auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+      auto *OrigC = Inst->getOperand(OpndIndex);
+      if (OrigC->getType() != NewArg->getType()) {
+        IRBuilder<> Builder(Inst->getParent(), Inst->getIterator());
+        Inst->setOperand(OpndIndex,
+                         createCast(Builder, NewArg, OrigC->getType()));
+      } else
+        Inst->setOperand(OpndIndex, NewArg);
+    }
+  }
+
+  return NewFunction;
+}
+
+// Given the original function (Thunk) and the merged function (ToFunc), create
+// a thunk to the merged function.
+
+static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params,
+                        Function *ToFunc) {
+  auto *Thunk = FI.F;
+
+  assert(Thunk->arg_size() + Params.size() ==
+         ToFunc->getFunctionType()->getNumParams());
+  Thunk->dropAllReferences();
+
+  BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk);
+  IRBuilder<> Builder(BB);
+
+  SmallVector<Value *> Args;
+  unsigned ParamIdx = 0;
+  FunctionType *ToFuncTy = ToFunc->getFunctionType();
+
+  // Add arguments which are passed through Thunk.
+  for (Argument &AI : Thunk->args()) {
+    Args.push_back(createCast(Builder, &AI, ToFuncTy->getParamType(ParamIdx)));
+    ++ParamIdx;
+  }
+
+  // Add new arguments defined by Params.
+  for (auto *Param : Params) {
+    assert(ParamIdx < ToFuncTy->getNumParams());
+    // FIXME: do not support signing
+    Args.push_back(
+        createCast(Builder, Param, ToFuncTy->getParamType(ParamIdx)));
+    ++ParamIdx;
+  }
+
+  CallInst *CI = Builder.CreateCall(ToFunc, Args);
+  bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail &&
+                         Thunk->getCallingConv() == CallingConv::SwiftTail;
+  CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
+                                      : llvm::CallInst::TCK_Tail);
+  CI->setCallingConv(ToFunc->getCallingConv());
+  CI->setAttributes(ToFunc->getAttributes());
+  if (Thunk->getReturnType()->isVoidTy()) {
+    Builder.CreateRetVoid();
+  } else {
+    Builder.CreateRet(createCast(Builder, CI, Thunk->getReturnType()));
+  }
+}
+
+// Check if the old merged/optimized IndexOperandHashMap is compatible with
+// the current IndexOperandHashMap. An operand hash may not be stable across
+// different builds due to varying modules combined. To address this, we relax
+// the hash check condition by comparing Const hash patterns instead of absolute
+// hash values. For example, let's assume we have three Consts located at idx1,
+// idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1
+// in the old merged map below:
+//   Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)]
+//   Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')]
+// If the current function also has three Consts in the same locations,
+// with hash sequences hash1', hash2', and hash1' where the first and third
+// are the same as the old hash sequences, we consider them matched.
+static bool checkConstHashCompatible(
+    const DenseMap<IndexPair, stable_hash> &OldInstOpndIndexToConstHash,
+    const DenseMap<IndexPair, stable_hash> &CurrInstOpndIndexToConstHash) {
+
+  DenseMap<stable_hash, stable_hash> OldHashToCurrHash;
+  for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) {
+    auto It = CurrInstOpndIndexToConstHash.find(Index);
+    if (It == CurrInstOpndIndexToConstHash.end())
+      return false;
+
+    auto CurrHash = It->second;
+    auto J = OldHashToCurrHash.find(OldHash);
+    if (J == OldHashToCurrHash.end())
+      OldHashToCurrHash.insert({OldHash, CurrHash});
+    else if (J->second != CurrHash)
+      return false;
+  }
+
+  return true;
+}
+
+// Validate the locations pointed by a param has the same hash and Constant.
+static bool checkConstLocationCompatible(const StableFunctionEntry &SF,
+                                         const IndexInstrMap &IndexInstruction,
+                                         const ParamLocsVecTy &ParamLocsVec) {
+  for (auto &ParamLocs : ParamLocsVec) {
+    std::optional<stable_hash> OldHash;
+    std::optional<Constant *> OldConst;
+    for (auto &Loc : ParamLocs) {
+      assert(SF.IndexOperandHashMap->count(Loc));
+      auto CurrHash = SF.IndexOperandHashMap.get()->at(Loc);
+      auto [InstIndex, OpndIndex] = Loc;
+      assert(InstIndex < IndexInstruction.size());
+      const auto *Inst = IndexInstruction.lookup(InstIndex);
+      auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex));
+      if (!OldHash) {
+        OldHash = CurrHash;
+        OldConst = CurrConst;
+      } else if (CurrConst != *OldConst || CurrHash != *OldHash)
+        return false;
+    }
+  }
+  return true;
+}
+
+static ParamLocsVecTy
+computeParamInfo(const SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) {
+  std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
+  auto &RSF = *SFS[0];
+  unsigned StableFunctionCount = SFS.size();
+
+  for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) {
+    // Const hash sequence across stable functions.
+    // We will allocate a parameter per unique hash squence.
+    // can't use SmallVector as key
+    std::vector<stable_hash> ConstHashSeq;
+    ConstHashSeq.push_back(Hash);
+    bool Identical = true;
+    for (unsigned J = 1; J < StableFunctionCount; ++J) {
+      auto &SF = SFS[J];
+      assert(SF->IndexOperandHashMap->count(IndexPair));
+      auto SHash = (*SF->IndexOperandHashMap)[IndexPair];
+      if (Hash != SHash)
+        Identical = false;
+      ConstHashSeq.push_back(SHash);
+    }
+
+    if (Identical)
+      continue;
+
+    // For each unique Const hash sequence (parameter), add the locations.
+    HashSeqToLocs[ConstHashSeq].push_back(IndexPair);
+  }
+
+  ParamLocsVecTy ParamLocsVec;
+  for (auto &[HashSeq, Locs] : HashSeqToLocs) {
+    ParamLocsVec.push_back(std::move(Locs));
+    std::sort(
+        ParamLocsVec.begin(), ParamLocsVec.end(),
+        [&](const ParamLocs &L, const ParamLocs &R) { return L[0] < R[0]; });
+  }
+  return ParamLocsVec;
+}
+
+static bool
+isProfitable(const SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS,
+             const Function *F) {
+  // No interest if the number of candidates are less than 2.
+  unsigned StableFunctionCount = SFS.size();
+  if (StableFunctionCount < 2)
+    return false;
+
+  unsigned InstCount = SFS[0]->InstCount;
+  if (InstCount < GlobalMergingMinInstrs)
+    return false;
+
+  unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
+  unsigned TotalParamCount = ParamCount + F->getFunctionType()->getNumParams();
+  if (TotalParamCount > GlobalMergingMaxParams)
+    return false;
+
+  unsigned Benefit = InstCount * (StableFunctionCount - 1);
+  unsigned Cost =
+      (GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
+          StableFunctionCount +
+      GlobalMergingExtraThreshold;
+
+  bool isProfitable = Benefit > Cost;
+  LLVM_DEBUG(
+      dbgs() << "isProfitable: Function = " << F->getName() << ", "
+             << "StableFunctionCount = " << StableFunctionCount
+             << ", InstCount = " << InstCount << ", ParamCount = " << ParamCount
+             << ", Benefit = " << Benefit << ", Cost = " << Cost
+             << ", Result = " << (isProfitable ? "true" : "false") << "\n");
+  return isProfitable;
+}
+
+bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
+  bool Changed = false;
+
+  // Build a map from stable function name to function.
+  StringMap<Function *> StableNameToFuncMap;
+  for (auto &F : M)
+    StableNameToFuncMap[get_stable_name(F.getName())] = &F;
+  // Track merged functions
+  DenseSet<Function *> MergedFunctions;
+
+  auto ModId = M.getModuleIdentifier();
+  for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
+    // Compute the parameter locations based on the unique hash sequences
+    // across the candidates.
+    auto ParamLocsVec = computeParamInfo(SFS);
+    LLVM_DEBUG({
+      dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash << " with Params "
+             << ParamLocsVec.size() << "\n";
+    });
+
+    Function *MergedFunc = nullptr;
+    std::string MergedModId;
+    SmallVector<FuncMergeInfo> FuncMergeInfos;
+    for (auto &SF : SFS) {
+      // Get the function from the stable name.
+      auto I = StableNameToFuncMap.find(
+          *FunctionMap->getNameForId(SF->FunctionNameId));
+      if (I == StableNameToFuncMap.end())
+        continue;
+      Function *F = I->second;
+      assert(F);
+      // Skip if the function has been merged before.
+      if (MergedFunctions.count(F))
+        continue;
+      // Consider the function if it is eligible for merging.
+      if (!isEligibleFunction(F))
+        continue;
+
+      auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
+      uint64_t FuncHash = FI.FunctionHash;
+      if (Hash != FuncHash) {
+        ++NumMismatchedFunctionHashGlobalMergeFunction;
+        continue;
+      }
+
+      if (SF->InstCount != FI.IndexInstruction->size()) {
+        ++NumMismatchedInstCountGlobalMergeFunction;
+        continue;
+      }
+      bool HasValidSharedConst = true;
+      for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
+        auto [InstIndex, OpndIndex] = Index;
+        assert(InstIndex < FI.IndexInstruction->size());
+        auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+        if (!isEligibleOperandForConstantSharing(Inst, OpndIndex)) {
+          HasValidSharedConst = false;
+          break;
+        }
+      }
+      if (!HasValidSharedConst) {
+        ++NumMismatchedConstHashGlobalMergeFunction;
+        continue;
+      }
+      if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
+                                    *FI.IndexOperandHashMap)) {
+        ++NumMismatchedConstHashGlobalMergeFunction;
+        continue;
+      }
+      if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
+                                        ParamLocsVec)) {
+        ++NumMismatchedConstHashGlobalMergeFunction;
+        continue;
+      }
+
+      if (!isProfitable(SFS, F))
+        break;
+
+      if (MergedFunc) {
+        // Check if the matched functions fall into the same (first) module.
+        // This module check is not strictly necessary as the functions can move
+        // around. We just want to avoid merging functions from different
+        // modules than the first one in the functon map, as they may not end up
+        // with not being ICFed.
+        if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
+          ++NumMismatchedModuleIdGlobalMergeFunction;
+          continue;
+        }
+      } else {
+        MergedFunc = F;
+        MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
+      }
+
+      FuncMergeInfos.push_back({SF.get(), F, std::move(FI.IndexInstruction)});
+      MergedFunctions.insert(F);
+    }
+    unsigned FuncMergeInfoSize = FuncMergeInfos.size();
+    if (FuncMergeInfoSize == 0)
+      continue;
+
+    LLVM_DEBUG({
+      dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
+             << " in  " << ModId << "\n";
+    });
+    for (auto &FMI : FuncMergeInfos) {
+      Changed = true;
+
+      // We've already validated all locations of constant operands pointed by
+      // the parameters. Just use the first one to bookkeep the original
+      // constants for each parameter
+      SmallVector<Constant *> Params;
+      SmallVector<Type *> ParamTypes;
+      for (auto &ParamLocs : ParamLocsVec) {
+        assert(!ParamLocs.empty());
+        auto &[InstIndex, OpndIndex] = ParamLocs[0];
+        auto *Inst = FMI.IndexInstruction->lookup(InstIndex);
+        auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex));
+        Params.push_back(Opnd);
+        ParamTypes.push_back(Opnd->getType());
+      }
+
+      // Create a merged function derived from the first function in the current
+      // module context.
+      Function *MergedFunc =
+          createMergedFunction(FMI, ParamTypes, ParamLocsVec);
+
+      LLVM_DEBUG({
+        dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash
+               << ") " << MergedFunc->getName() << " generated from "
+               << FMI.F->getName() << ":\n";
+        MergedFunc->dump();
+      });
+
+      // Create a thunk to the merged function.
+      createThunk(FMI, Params, MergedFunc);
+      LLVM_DEBUG({
+        dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
+        FMI.F->dump();
+      });
+      ++NumGlobalMergeFunctions;
+    }
+  }
+
+  return Changed;
+}
+
+char GlobalMergeFunc::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalMergeFunc, "global-merge-func",
+                      "Global merge function pass", false, false)
+INITIALIZE_PASS_END(GlobalMergeFunc, "global-merge-func",
+                    "Global merge function pass", false, false)
+
+StringRef GlobalMergeFunc::getPassName() const {
+  return "Global Merge Functions";
+}
+
+void GlobalMergeFunc::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
+  AU.setPreservesAll();
+  ModulePass::getAnalysisUsage(AU);
+}
+
+GlobalMergeFunc::GlobalMergeFunc() : ModulePass(ID) {
+  initializeGlobalMergeFuncPass(*llvm::PassRegistry::getPassRegistry());
+}
+
+namespace llvm {
+Pass *createGlobalMergeFuncPass() { return new GlobalMergeFunc(); }
+} // namespace llvm
+
+void GlobalMergeFunc::initializeMergerMode(const Module &M) {
+  LocalFunctionMap = std::make_unique<StableFunctionMap>();
+
+  if (DisableGlobalMerging)
+    return;
+
+  if (auto *IndexWrapperPass =
+          getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
+    auto *TheIndex = IndexWrapperPass->getIndex();
+    // (Full)LTO module does not have functions added to the index.
+    // In this case, we run a local merger without using codegen data.
+    if (TheIndex && !TheIndex->hasExportedFunctions(M))
+      return;
+  }
+
+  if (cgdata::emitCGData())
+    MergerMode = HashFunctionMode::BuildingHashFuncion;
+  else if (cgdata::hasStableFunctionMap())
+    MergerMode = HashFunctionMode::UsingHashFunction;
+}
+
+void GlobalMergeFunc::emitFunctionMap(Module &M) {
+  LLVM_DEBUG({
+    dbgs() << "Emit function map. Size: " << LocalFunctionMap->size() << "\n";
+  });
+  SmallVector<char> Buf;
+  raw_svector_ostream OS(Buf);
+
+  StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get());
+
+  std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
+      OS.str(), "in-memory stable function map", false);
+
+  Triple TT(M.getTargetTriple());
+  embedBufferInModule(M, *Buffer.get(),
+                      getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()),
+                      Align(4));
+}
+
+bool GlobalMergeFunc::runOnModule(Module &M) {
+  initializeMergerMode(M);
+
+  const StableFunctionMap *FuncMap;
+  if (MergerMode == HashFunctionMode::UsingHashFunction) {
+    // Use the prior CG data to optimistically create global merge candidates.
+    FuncMap = cgdata::getStableFunctionMap();
+  } else {
+    analyze(M);
+    // Emit the local function map to the custom section, __llvm_merge before
+    // finalizing it.
+    if (MergerMode == HashFunctionMode::BuildingHashFuncion &&
+        !LocalFunctionMap->empty())
+      emitFunctionMap(M);
+    LocalFunctionMap->finalize();
+    FuncMap = LocalFunctionMap.get();
+  }
+
+  return merge(M, FuncMap);
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
new file mode 100644
index 00000000000000..4f5f5c0b5b26d9
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
@@ -0,0 +1,62 @@
+; This test checks if two similar functions, f1 and f2, can be merged locally within a single module
+; while parameterizing a difference in their global variables, g1 and g2.
+; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm,
+; which are tail-called from thunks g1 and g2 respectively.
+; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC).
+
+; RUN: opt -module-summary -module-hash %s -o %t
+
+; RUN: llvm-lto2 run -enable-global-merge-func=false %t -o %tout-nomerge \
+; RUN:    -r %t,_f1,px \
+; RUN:    -r %t,_f2,px \
+; RUN:    -r %t,_g,l -r %t,_g1,l -r %t,_g2,l
+; RUN: llvm-nm %tout-nomerge.1 | FileCheck %s --check-prefix=NOMERGE
+; RUN: llvm-lto2 run -enable-global-merge-func=true %t -o %tout-merge \
+; RUN:    -r %t,_f1,px \
+; RUN:    -r %t,_f2,px \
+; RUN:    -r %t,_g,l -r %t,_g1,l -r %t,_g2,l
+; RUN: llvm-nm %tout-merge.1 | FileCheck %s --check-prefix=GLOBALMERGE
+; RUN: llvm-objdump -d %tout-merge.1 | FileCheck %s --check-prefix=THUNK
+
+; NOMERGE-NOT: _f1.Tgm
+; GLOBALMERGE: _f1.Tgm
+; GLOBALMERGE: _f2.Tgm
+
+; THUNK: <_f1>:
+; THUNK-NEXT: adrp x1,
+; THUNK-NEXT: ldr x1, [x1]
+; THUNK-NEXT: b
+
+; THUNK: <_f2>:
+; THUNK-NEXT: adrp x1,
+; THUNK-NEXT: ldr x1, [x1]
+; THUNK-NEXT: b
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g1, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
+
+define i32 @f2(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g2, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
new file mode 100644
index 00000000000000..da756e7f15e68e
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
@@ -0,0 +1,82 @@
+; This test demonstrates how similar functions are handled during global outlining.
+; Currently, we do not attempt to share an merged function for identical sequences.
+; Instead, each merging instance is created uniquely.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; First, run with -codegen-data-generate=true to generate the cgdata in the object files.
+; Using llvm-cgdata, merge the cg data.
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \
+; RUN:    -r %t-foo.bc,_f1,px \
+; RUN:    -r %t-goo.bc,_f2,px \
+; RUN:    -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN:    -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2
+
+; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary.
+; Each module has its own merging instance as it is matched against the merged cgdata.
+; RUN: llvm-lto2 run -enable-global-merge-func=true \
+; RUN:    -codegen-data-use-path=%tout.cgdata \
+; RUN:    %t-foo.bc %t-goo.bc -o %tout-read \
+; RUN:    -r %t-foo.bc,_f1,px \
+; RUN:    -r %t-goo.bc,_f2,px \
+; RUN:    -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN:    -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1
+; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2
+; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1
+; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2
+
+; READ1: _f1.Tgm
+; READ2: _f2.Tgm
+
+; THUNK1: <_f1>:
+; THUNK1-NEXT: adrp x1,
+; THUNK1-NEXT: ldr x1, [x1]
+; THUNK1-NEXT: b
+
+; THUNK2: <_f2>:
+; THUNK2-NEXT: adrp x1,
+; THUNK2-NEXT: ldr x1, [x1]
+; THUNK2-NEXT: b
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g1, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g2, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
new file mode 100644
index 00000000000000..06880e3d268189
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
@@ -0,0 +1,68 @@
+; TODO: This test checks if the how similar functions are handled during global outlining
+; by repeating the codegen via -codegen-data-thinlto-two-rounds=true.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-thinlto-two-rounds=true %t-foo.bc %t-goo.bc -o %tout \
+; RUN:    -r %t-foo.bc,_f1,px \
+; RUN:    -r %t-goo.bc,_f2,px \
+; RUN:    -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN:    -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout.1 | FileCheck %s --check-prefix=OUT1
+; RUN: llvm-nm %tout.2 | FileCheck %s --check-prefix=OUT2
+; RUN: llvm-objdump -d %tout.1 | FileCheck %s --check-prefix=THUNK1
+; RUN: llvm-objdump -d %tout.2 | FileCheck %s --check-prefix=THUNK2
+
+; OUT1: _f1.Tgm
+; OUT2: _f2.Tgm
+
+; THUNK1: <_f1>:
+; THUNK1-NEXT: adrp x1,
+; THUNK1-NEXT: ldr x1, [x1]
+; THUNK1-NEXT: b
+
+; THUNK2: <_f2>:
+; THUNK2-NEXT: adrp x1,
+; THUNK2-NEXT: ldr x1, [x1]
+; THUNK2-NEXT: b
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g1, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g2, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
new file mode 100644
index 00000000000000..a4022eb885b43f
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
@@ -0,0 +1,97 @@
+; This test verifies whether a stable function is encoded into the __llvm_merge section
+; when the -codegen-data-generate flag is used under -enable-global-merge-func=true.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=false %t-foo.bc %t-goo.bc -o %tout-nowrite \
+; RUN:    -r %t-foo.bc,_f1,px \
+; RUN:    -r %t-goo.bc,_f2,px \
+; RUN:    -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN:    -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=NOWRITE
+; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=NOWRITE
+
+; No merge instance is locally created as each module has a singltone function.
+; NOWRITE-NOT: _f1.Tgm
+; NOWRITE-NOT: _f2.Tgm
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-nowrite \
+; RUN:    -r %t-foo.bc,_f1,px \
+; RUN:    -r %t-goo.bc,_f2,px \
+; RUN:    -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN:    -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=WRITE
+; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=WRITE
+; RUN: llvm-objdump -h %tout-nowrite.1 | FileCheck %s --check-prefix=SECTNAME
+; RUN: llvm-objdump -h %tout-nowrite.2 | FileCheck %s --check-prefix=SECTNAME
+
+; On a write mode, no merging happens yet for each module.
+; We only create stable functions and publish them into __llvm_merge section for each object.
+; WRITE-NOT: _f1.Tgm
+; WRITE-NOT: _f2.Tgm
+; SECTNAME: __llvm_merge
+
+; Merge the cgdata using llvm-cgdata.
+; We now validate the content of the merged cgdata.
+; Two functions have the same hash with only one different constnat at a same location.
+; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
+; RUN: llvm-cgdata --convert %tout.cgdata   -o - | FileCheck %s
+
+; CHECK:      - Hash: [[#%d,HASH:]]
+; CHECK-NEXT:   FunctionName: f1
+; CHECK-NEXT:   ModuleName: {{.*}}
+; CHECK-NEXT:   InstCount: [[#%d,INSTCOUNT:]]
+; CHECK-NEXT:   IndexOperandHashes:
+; CHECK-NEXT:     - InstIndex: [[#%d,INSTINDEX:]]
+; CHECK-NEXT:       OpndIndex: [[#%d,OPNDINDEX:]]
+; CHECK-NEXT:       OpndHash: {{.*}}
+
+; CHECK:      - Hash: [[#%d,HASH]]
+; CHECK-NEXT:   FunctionName: f2
+; CHECK-NEXT:   ModuleName: {{.*}}
+; CHECK-NEXT:   InstCount: [[#%d,INSTCOUNT]]
+; CHECK-NEXT:   IndexOperandHashes:
+; CHECK-NEXT:     - InstIndex: [[#%d,INSTINDEX]]
+; CHECK-NEXT:       OpndIndex: [[#%d,OPNDINDEX]]
+; CHECK-NEXT:       OpndHash: {{.*}}
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g1, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+  %idxprom = sext i32 %a to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* @g2, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, 1
+  ret i32 %add
+}
diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt
index 3b4644d6e27715..6ddccc7f17442f 100644
--- a/llvm/tools/llvm-lto2/CMakeLists.txt
+++ b/llvm/tools/llvm-lto2/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
   BitReader
   CodeGen
   Core
+  IPO
   Linker
   LTO
   MC
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index d4f022ef021a44..0dc6623552a4bd 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/Threading.h"
+#include "llvm/Transforms/IPO.h"
 #include <atomic>
 
 using namespace llvm;
@@ -195,6 +196,7 @@ static cl::opt<bool> TryUseNewDbgInfoFormat(
 extern cl::opt<bool> UseNewDbgInfoFormat;
 extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat;
 extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
+extern cl::opt<bool> EnableGlobalMergeFunc;
 
 static void check(Error E, std::string Msg) {
   if (!E)
@@ -374,6 +376,10 @@ static int run(int argc, char **argv) {
     if (DI.getSeverity() == DS_Error)
       HasErrors = true;
   };
+  Conf.PreCodeGenPassesHook = [](legacy::PassManager &pm) {
+    if (EnableGlobalMergeFunc)
+      pm.add(createGlobalMergeFuncPass());
+  };
 
   LTO::LTOKind LTOMode = LTO::LTOK_Default;
 

>From 319415409340ef4af8dc4d980c50a413190846c7 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 18 Oct 2024 10:12:21 -0700
Subject: [PATCH 5/5] Address comments from tschuett

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 -
 llvm/include/llvm/Transforms/IPO.h            |  3 ++
 .../Transforms/IPO/GlobalMergeFunctions.h     | 45 ++++++++--------
 .../Transforms/IPO/GlobalMergeFunctions.cpp   | 53 +++++++++----------
 4 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index c5b4811815179d..ad80c661147d6f 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -77,7 +77,6 @@
 #include "llvm/Target/CGPassBuilderOption.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/CFGuard.h"
-#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
 #include "llvm/Transforms/Scalar/ConstantHoisting.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index 86a8654f56997c..28cf330ad20812 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -55,6 +55,9 @@ enum class PassSummaryAction {
   Export, ///< Export information to summary.
 };
 
+/// createGlobalMergeFuncPass - This pass generates merged instances by
+/// parameterizing distinct constants across similar functions, utilizing stable
+/// function hash information.
 Pass *createGlobalMergeFuncPass();
 
 } // End llvm namespace
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
index 565be54f89e882..395aeda73b2d0f 100644
--- a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
+++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
@@ -5,23 +5,29 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-///
-/// This file defines global merge functions pass and related data structure.
-///
+//
+// This pass defines the implementation of a function merging mechanism
+// that utilizes a stable function hash to track differences in constants and
+// identify potential merge candidates. The process involves two rounds:
+// 1. The first round collects stable function hashes and identifies merge
+//    candidates with matching hashes. It also computes the set of parameters
+//    that point to different constants during the stable function merge.
+// 2. The second round leverages this collected global function information to
+//    optimistically create a merged function in each module context, ensuring
+//    correct transformation.
+// Similar to the global outliner, this approach uses the linker's deduplication
+// (ICF) to fold identical merged functions, thereby reducing the final binary
+// size. The work is inspired by the concepts discussed in the following paper:
+// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
+//
 //===----------------------------------------------------------------------===//
 
-#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
-#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#ifndef LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
+#define LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
 
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StableHashing.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/CGData/StableFunctionMap.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include <map>
-#include <mutex>
 
 enum class HashFunctionMode {
   Local,
@@ -36,15 +42,10 @@ namespace llvm {
 using ParamLocs = SmallVector<IndexPair, 4>;
 // A vector of parameters
 using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
-// A map of stable hash to a vector of stable functions
-
-/// GlobalMergeFunc finds functions which only differ by constants in
-/// certain instructions, e.g. resulting from specialized functions of layout
-/// compatible types.
-/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
-/// hash to find the merge candidate. Similar to the global outliner, we can run
-/// codegen twice to collect function merge candidate in the first round, and
-/// merge functions globally in the second round.
+
+/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
+/// using stable function hashes. It identifies and merges functions with
+/// matching hashes across modules to optimize binary size.
 class GlobalMergeFunc : public ModulePass {
   HashFunctionMode MergerMode = HashFunctionMode::Local;
 
@@ -69,9 +70,9 @@ class GlobalMergeFunc : public ModulePass {
   /// Emit LocalFunctionMap into __llvm_merge section.
   void emitFunctionMap(Module &M);
 
-  /// Merge functions in the module using the global function map.
+  /// Merge functions in the module using the given function map.
   bool merge(Module &M, const StableFunctionMap *FunctionMap);
 };
 
 } // end namespace llvm
-#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#endif // LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
index 599de722814f21..cec4d4caf63607 100644
--- a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
@@ -6,16 +6,19 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// TODO:  This implements a function merge using function hash while tracking
-// differences in Constants. This uses stable function hash to find potential
-// merge candidates. The first codegen round collects stable function hashes,
-// and determines the merge candidates that match the stable function hashes.
-// The set of parameters pointing to different Constants are also computed
-// during the stable function merge. The second codegen round uses this global
-// function info to optimistically create a merged function in each module
-// context to guarantee correct transformation. Similar to the global outliner,
-// the linker's deduplication (ICF) folds the identical merged functions to save
-// the final binary size.
+// This pass defines the implementation of a function merging mechanism
+// that utilizes a stable function hash to track differences in constants and
+// create potential merge candidates. The process involves two rounds:
+// 1. The first round collects stable function hashes and identifies merge
+//    candidates with matching hashes. It also computes the set of parameters
+//    that point to different constants during the stable function merge.
+// 2. The second round leverages this collected global function information to
+//    optimistically create a merged function in each module context, ensuring
+//    correct transformation.
+// Similar to the global outliner, this approach uses the linker's deduplication
+// (ICF) to fold identical merged functions, thereby reducing the final binary
+// size. The work is inspired by the concepts discussed in the following paper:
+// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,9 +26,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/CGData/CodeGenData.h"
-#include "llvm/CGData/StableFunctionMap.h"
-#include "llvm/CodeGen/MachineStableHash.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/StructuralHash.h"
 #include "llvm/InitializePasses.h"
@@ -84,7 +84,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
 STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
 STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
 
-/// Returns true if the \opIdx operand of \p CI is the callee operand.
+/// Returns true if the \OpIdx operand of \p CI is the callee operand.
 static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
   return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
 }
@@ -148,22 +148,19 @@ bool isEligibleFunction(Function *F) {
   if (F->hasFnAttribute(llvm::Attribute::NoMerge))
     return false;
 
-  if (F->hasAvailableExternallyLinkage()) {
+  if (F->hasAvailableExternallyLinkage())
     return false;
-  }
 
-  if (F->getFunctionType()->isVarArg()) {
+  if (F->getFunctionType()->isVarArg())
     return false;
-  }
 
   if (F->getCallingConv() == CallingConv::SwiftTail)
     return false;
 
-  // if function contains callsites with musttail, if we merge
+  // If function contains callsites with musttail, if we merge
   // it, the merged function will have the musttail callsite, but
   // the number of parameters can change, thus the parameter count
   // of the callsite will mismatch with the function itself.
-  // if (IgnoreMusttailFunction) {
   for (const BasicBlock &BB : *F) {
     for (const Instruction &I : BB) {
       const auto *CB = dyn_cast<CallBase>(&I);
@@ -203,7 +200,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
   return true;
 }
 
-// copy from merge functions.cpp
 static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
   Type *SrcTy = V->getType();
   if (SrcTy->isStructTy()) {
@@ -252,7 +248,8 @@ void GlobalMergeFunc::analyze(Module &M) {
 
       auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
 
-      // Convert the map to a vector for a serialization-friendly format.
+      // Convert the operand map to a vector for a serialization-friendly
+      // format.
       IndexOperandHashVecType IndexOperandHashes;
       for (auto &Pair : *FI.IndexOperandHashMap)
         IndexOperandHashes.emplace_back(Pair);
@@ -595,7 +592,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
         // This module check is not strictly necessary as the functions can move
         // around. We just want to avoid merging functions from different
         // modules than the first one in the functon map, as they may not end up
-        // with not being ICFed.
+        // with not being ICFed by the linker.
         if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
           ++NumMismatchedModuleIdGlobalMergeFunction;
           continue;
@@ -616,12 +613,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
       dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
              << " in  " << ModId << "\n";
     });
+
     for (auto &FMI : FuncMergeInfos) {
       Changed = true;
 
       // We've already validated all locations of constant operands pointed by
-      // the parameters. Just use the first one to bookkeep the original
-      // constants for each parameter
+      // the parameters. Populate parameters pointing to the original constants.
       SmallVector<Constant *> Params;
       SmallVector<Type *> ParamTypes;
       for (auto &ParamLocs : ParamLocsVec) {
@@ -633,8 +630,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
         ParamTypes.push_back(Opnd->getType());
       }
 
-      // Create a merged function derived from the first function in the current
-      // module context.
+      // Create a merged function derived from the current function.
       Function *MergedFunc =
           createMergedFunction(FMI, ParamTypes, ParamLocsVec);
 
@@ -645,7 +641,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
         MergedFunc->dump();
       });
 
-      // Create a thunk to the merged function.
+      // Transform the current function into a thunk that calls the merged
+      // function.
       createThunk(FMI, Params, MergedFunc);
       LLVM_DEBUG({
         dbgs() << "[GlobalMergeFunc] Thunk generated: \n";