[llvm-branch-commits] [lld] [llvm] [CGData] Global Merge Functions (PR #112671)
Kyungwoo Lee via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Nov 4 00:48:08 PST 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/112671
>From 8e10ed3b27b0f0098782171bb38387e86536be5f Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sat, 7 Sep 2024 22:48:17 -0700
Subject: [PATCH 1/5] [CGData] Stable Function Map
These define the main data structures to represent stable functions and group
similar functions in a function map.
Serialization is supported in a binary or yaml form.
---
llvm/include/llvm/CGData/StableFunctionMap.h | 136 ++++++++++++
.../llvm/CGData/StableFunctionMapRecord.h | 71 ++++++
llvm/lib/CGData/CMakeLists.txt | 2 +
llvm/lib/CGData/StableFunctionMap.cpp | 170 +++++++++++++++
llvm/lib/CGData/StableFunctionMapRecord.cpp | 203 ++++++++++++++++++
llvm/unittests/CGData/CMakeLists.txt | 2 +
.../CGData/StableFunctionMapRecordTest.cpp | 127 +++++++++++
.../CGData/StableFunctionMapTest.cpp | 153 +++++++++++++
8 files changed, 864 insertions(+)
create mode 100644 llvm/include/llvm/CGData/StableFunctionMap.h
create mode 100644 llvm/include/llvm/CGData/StableFunctionMapRecord.h
create mode 100644 llvm/lib/CGData/StableFunctionMap.cpp
create mode 100644 llvm/lib/CGData/StableFunctionMapRecord.cpp
create mode 100644 llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
create mode 100644 llvm/unittests/CGData/StableFunctionMapTest.cpp
diff --git a/llvm/include/llvm/CGData/StableFunctionMap.h b/llvm/include/llvm/CGData/StableFunctionMap.h
new file mode 100644
index 00000000000000..ed217e381610da
--- /dev/null
+++ b/llvm/include/llvm/CGData/StableFunctionMap.h
@@ -0,0 +1,136 @@
+//===- StableFunctionMap.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This defines the StableFunctionMap class, to track similar functions.
+// It provides a mechanism to map stable hashes of functions to their
+// corresponding metadata. It includes structures for storing function details
+// and methods for managing and querying these mappings.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_STABLEFUNCTIONMAP_H
+#define LLVM_CGDATA_STABLEFUNCTIONMAP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/StructuralHash.h"
+
+namespace llvm {
+
+using IndexPairHash = std::pair<IndexPair, stable_hash>;
+using IndexOperandHashVecType = SmallVector<IndexPairHash>;
+
+/// A stable function is a function with a stable hash while tracking the
+/// locations of ignored operands and their hashes.
+struct StableFunction {
+ /// The combined stable hash of the function.
+ stable_hash Hash;
+ /// The name of the function.
+ std::string FunctionName;
+ /// The name of the module the function is in.
+ std::string ModuleName;
+ /// The number of instructions.
+ unsigned InstCount;
+ /// A vector of pairs of IndexPair and operand hash which was skipped.
+ IndexOperandHashVecType IndexOperandHashes;
+
+ StableFunction(stable_hash Hash, const std::string FunctionName,
+ const std::string ModuleName, unsigned InstCount,
+ IndexOperandHashVecType &&IndexOperandHashes)
+ : Hash(Hash), FunctionName(FunctionName), ModuleName(ModuleName),
+ InstCount(InstCount),
+ IndexOperandHashes(std::move(IndexOperandHashes)) {}
+ StableFunction() = default;
+};
+
+/// An efficient form of StableFunction for fast look-up
+struct StableFunctionEntry {
+ /// The combined stable hash of the function.
+ stable_hash Hash;
+ /// Id of the function name.
+ unsigned FunctionNameId;
+ /// Id of the module name.
+ unsigned ModuleNameId;
+ /// The number of instructions.
+ unsigned InstCount;
+ /// A map from an IndexPair to a stable_hash which was skipped.
+ std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
+
+ StableFunctionEntry(
+ stable_hash Hash, unsigned FunctionNameId, unsigned ModuleNameId,
+ unsigned InstCount,
+ std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
+ : Hash(Hash), FunctionNameId(FunctionNameId), ModuleNameId(ModuleNameId),
+ InstCount(InstCount),
+ IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
+};
+
+using HashFuncsMapType =
+ DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
+
+class StableFunctionMap {
+ /// A map from a stable_hash to a vector of functions with that hash.
+ HashFuncsMapType HashToFuncs;
+ /// A vector of strings to hold names.
+ SmallVector<std::string> IdToName;
+ /// A map from StringRef (name) to an ID.
+ StringMap<unsigned> NameToId;
+ /// True if the function map is finalized with minimal content.
+ bool Finalized = false;
+
+public:
+ /// Get the HashToFuncs map for serialization.
+ const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
+
+ /// Get the NameToId vector for serialization.
+ const SmallVector<std::string> getNames() const { return IdToName; }
+
+ /// Get an existing ID associated with the given name or create a new ID if it
+ /// doesn't exist.
+ unsigned getIdOrCreateForName(StringRef Name);
+
+ /// Get the name associated with a given ID
+ std::optional<std::string> getNameForId(unsigned Id) const;
+
+ /// Insert a `StableFunction` object into the function map. This method
+ /// handles the uniquing of string names and create a `StableFunctionEntry`
+ /// for insertion.
+ void insert(const StableFunction &Func);
+
+ /// Insert a `StableFunctionEntry` into the function map directly. This
+ /// method assumes that string names have already been uniqued and the
+ /// `StableFunctionEntry` is ready for insertion.
+ void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
+ assert(!Finalized && "Cannot insert after finalization");
+ HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
+ }
+
+ /// Merge a \p OtherMap into this function map.
+ void merge(const StableFunctionMap &OtherMap);
+
+ /// \returns true if there is no stable function entry.
+ bool empty() { return size() == 0; }
+
+ enum SizeType {
+ UniqueHashCount, // The number of unique hashes in HashToFuncs.
+ TotalFunctionCount, // The number of total functions in HashToFuncs.
+ MergeableFunctionCount, // The number of functions that can be merged based
+ // on their hash.
+ };
+
+ /// \returns the size of StableFunctionMap.
+ /// \p Type is the type of size to return.
+ size_t size(SizeType Type = UniqueHashCount) const;
+
+ /// Finalize the stable function map by trimming content.
+ void finalize();
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
new file mode 100644
index 00000000000000..0517f2c20d72ff
--- /dev/null
+++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
@@ -0,0 +1,71 @@
+//===- StableFunctionMapRecord.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This defines the StableFunctionMapRecord structure, which provides
+// functionality for managing and serializing a StableFunctionMap. It includes
+// methods for serialization to and from raw and YAML streams, as well as
+// utilities for merging and finalizing function maps.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
+#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
+
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct StableFunctionMapRecord {
+ std::unique_ptr<StableFunctionMap> FunctionMap;
+
+ StableFunctionMapRecord() {
+ FunctionMap = std::make_unique<StableFunctionMap>();
+ }
+
+ StableFunctionMapRecord(std::unique_ptr<StableFunctionMap> FunctionMap)
+ : FunctionMap(std::move(FunctionMap)) {}
+
+ /// A static helper function to serialize the stable function map without
+ /// owning the stable function map.
+ static void serialize(raw_ostream &OS, const StableFunctionMap *FunctionMap);
+
+ /// Serialize the stable function map to a raw_ostream.
+ void serialize(raw_ostream &OS) const;
+
+ /// Deserialize the stable function map from a raw_ostream.
+ void deserialize(const unsigned char *&Ptr);
+
+ /// Serialize the stable function map to a YAML stream.
+ void serializeYAML(yaml::Output &YOS) const;
+
+ /// Deserialize the stable function map from a YAML stream.
+ void deserializeYAML(yaml::Input &YIS);
+
+ /// Finalize the stable function map by trimming content.
+ void finalize() { FunctionMap->finalize(); }
+
+ /// Merge the stable function map into this one.
+ void merge(const StableFunctionMapRecord &Other) {
+ FunctionMap->merge(*Other.FunctionMap);
+ }
+
+ /// \returns true if the stable function map is empty.
+ bool empty() const { return FunctionMap->empty(); }
+
+ /// Print the stable function map in a YAML format.
+ void print(raw_ostream &OS = llvm::errs()) const {
+ yaml::Output YOS(OS);
+ serializeYAML(YOS);
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/CGData/CMakeLists.txt b/llvm/lib/CGData/CMakeLists.txt
index 157b0dfb7f9fcf..003173139f36c5 100644
--- a/llvm/lib/CGData/CMakeLists.txt
+++ b/llvm/lib/CGData/CMakeLists.txt
@@ -4,6 +4,8 @@ add_llvm_component_library(LLVMCGData
CodeGenDataWriter.cpp
OutlinedHashTree.cpp
OutlinedHashTreeRecord.cpp
+ StableFunctionMap.cpp
+ StableFunctionMapRecord.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData
diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp
new file mode 100644
index 00000000000000..638f92df68e028
--- /dev/null
+++ b/llvm/lib/CGData/StableFunctionMap.cpp
@@ -0,0 +1,170 @@
+//===-- StableFunctionMap.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the functionality for the StableFunctionMap class, which
+// manages the mapping of stable function hashes to their metadata. It includes
+// methods for inserting, merging, and finalizing function entries, as well as
+// utilities for handling function names and IDs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMap.h"
+
+#define DEBUG_TYPE "stable-function-map"
+
+using namespace llvm;
+
+unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
+ auto It = NameToId.find(Name);
+ if (It == NameToId.end()) {
+ unsigned Id = IdToName.size();
+ assert(Id == NameToId.size() && "ID collision");
+ IdToName.emplace_back(Name.str());
+ NameToId[IdToName.back()] = Id;
+ return Id;
+ } else {
+ return It->second;
+ }
+}
+
+std::optional<std::string> StableFunctionMap::getNameForId(unsigned Id) const {
+ if (Id >= IdToName.size())
+ return std::nullopt;
+ return IdToName[Id];
+}
+
+void StableFunctionMap::insert(const StableFunction &Func) {
+ assert(!Finalized && "Cannot insert after finalization");
+ auto FuncNameId = getIdOrCreateForName(Func.FunctionName);
+ auto ModuleNameId = getIdOrCreateForName(Func.ModuleName);
+ auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+ for (auto &[Index, Hash] : Func.IndexOperandHashes)
+ (*IndexOperandHashMap)[Index] = Hash;
+ auto FuncEntry = std::make_unique<StableFunctionEntry>(
+ Func.Hash, FuncNameId, ModuleNameId, Func.InstCount,
+ std::move(IndexOperandHashMap));
+ insert(std::move(FuncEntry));
+}
+
+void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
+ assert(!Finalized && "Cannot merge after finalization");
+ for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
+ auto &ThisFuncs = HashToFuncs[Hash];
+ for (auto &Func : Funcs) {
+ auto FuncNameId =
+ getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
+ auto ModuleNameId =
+ getIdOrCreateForName(*OtherMap.getNameForId(Func->ModuleNameId));
+ auto ClonedIndexOperandHashMap =
+ std::make_unique<IndexOperandHashMapType>(*Func->IndexOperandHashMap);
+ ThisFuncs.emplace_back(std::make_unique<StableFunctionEntry>(
+ Func->Hash, FuncNameId, ModuleNameId, Func->InstCount,
+ std::move(ClonedIndexOperandHashMap)));
+ }
+ }
+}
+
+size_t StableFunctionMap::size(SizeType Type) const {
+ switch (Type) {
+ case UniqueHashCount:
+ return HashToFuncs.size();
+ case TotalFunctionCount: {
+ size_t Count = 0;
+ for (auto &Funcs : HashToFuncs)
+ Count += Funcs.second.size();
+ return Count;
+ }
+ case MergeableFunctionCount: {
+ size_t Count = 0;
+ for (auto &[Hash, Funcs] : HashToFuncs)
+ if (Funcs.size() >= 2)
+ Count += Funcs.size();
+ return Count;
+ }
+ }
+ return 0;
+}
+
+using ParamLocs = SmallVector<IndexPair>;
+static void removeIdenticalIndexPair(
+ SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) {
+ auto &RSF = SFS[0];
+ unsigned StableFunctionCount = SFS.size();
+
+ SmallVector<IndexPair> ToDelete;
+ for (auto &[Pair, Hash] : *(RSF->IndexOperandHashMap)) {
+ bool Identical = true;
+ for (unsigned J = 1; J < StableFunctionCount; ++J) {
+ auto &SF = SFS[J];
+ assert(SF->IndexOperandHashMap->count(Pair));
+ auto SHash = (*SF->IndexOperandHashMap)[Pair];
+ if (Hash != SHash) {
+ Identical = false;
+ break;
+ }
+ }
+
+ // No need to parameterize them if the hashes are identical across stable
+ // functions.
+ if (Identical)
+ ToDelete.emplace_back(Pair);
+ }
+
+ for (auto &Pair : ToDelete)
+ for (auto &SF : SFS)
+ SF->IndexOperandHashMap->erase(Pair);
+}
+
+void StableFunctionMap::finalize() {
+ Finalized = true;
+
+ for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
+ auto &[StableHash, SFS] = *It;
+
+ // Group stable functions by ModuleIdentifier.
+ std::stable_sort(SFS.begin(), SFS.end(),
+ [&](const std::unique_ptr<StableFunctionEntry> &L,
+ const std::unique_ptr<StableFunctionEntry> &R) {
+ return *getNameForId(L->ModuleNameId) <
+ *getNameForId(R->ModuleNameId);
+ });
+
+ // Consider the first function as the root function.
+ auto &RSF = SFS[0];
+
+ bool IsValid = true;
+ unsigned StableFunctionCount = SFS.size();
+ for (unsigned I = 1; I < StableFunctionCount; ++I) {
+ auto &SF = SFS[I];
+ assert(RSF->Hash == SF->Hash);
+ if (RSF->InstCount != SF->InstCount) {
+ IsValid = false;
+ break;
+ }
+ if (RSF->IndexOperandHashMap->size() != SF->IndexOperandHashMap->size()) {
+ IsValid = false;
+ break;
+ }
+ for (auto &P : *RSF->IndexOperandHashMap) {
+ auto &InstOpndIndex = P.first;
+ if (!SF->IndexOperandHashMap->count(InstOpndIndex)) {
+ IsValid = false;
+ break;
+ }
+ }
+ }
+ if (!IsValid) {
+ HashToFuncs.erase(It);
+ continue;
+ }
+
+ // Trim the index pair that has the same operand hash across
+ // stable functions.
+ removeIdenticalIndexPair(SFS);
+ }
+}
diff --git a/llvm/lib/CGData/StableFunctionMapRecord.cpp b/llvm/lib/CGData/StableFunctionMapRecord.cpp
new file mode 100644
index 00000000000000..05c96f873947ee
--- /dev/null
+++ b/llvm/lib/CGData/StableFunctionMapRecord.cpp
@@ -0,0 +1,203 @@
+//===-- StableFunctionMapRecord.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the functionality for the StableFunctionMapRecord class,
+// including methods for serialization and deserialization of stable function
+// maps to and from raw and YAML streams. It also includes utilities for
+// managing function entries and their metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMapRecord.h"
+#include "llvm/Support/EndianStream.h"
+
+#define DEBUG_TYPE "stable-function-map-record"
+
+using namespace llvm;
+using namespace llvm::support;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(IndexPairHash)
+LLVM_YAML_IS_SEQUENCE_VECTOR(StableFunction)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<IndexPairHash> {
+ static void mapping(IO &IO, IndexPairHash &Key) {
+ IO.mapRequired("InstIndex", Key.first.first);
+ IO.mapRequired("OpndIndex", Key.first.second);
+ IO.mapRequired("OpndHash", Key.second);
+ }
+};
+
+template <> struct MappingTraits<StableFunction> {
+ static void mapping(IO &IO, StableFunction &Func) {
+ IO.mapRequired("Hash", Func.Hash);
+ IO.mapRequired("FunctionName", Func.FunctionName);
+ IO.mapRequired("ModuleName", Func.ModuleName);
+ IO.mapRequired("InstCount", Func.InstCount);
+ IO.mapRequired("IndexOperandHashes", Func.IndexOperandHashes);
+ }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+// Get a sorted vector of StableFunctionEntry pointers.
+static SmallVector<const StableFunctionEntry *>
+getStableFunctionEntries(const StableFunctionMap &SFM) {
+ SmallVector<const StableFunctionEntry *> FuncEntries;
+ for (const auto &P : SFM.getFunctionMap())
+ for (auto &Func : P.second)
+ FuncEntries.emplace_back(Func.get());
+
+ std::stable_sort(
+ FuncEntries.begin(), FuncEntries.end(), [&](auto &A, auto &B) {
+ return std::tuple(A->Hash, SFM.getNameForId(A->ModuleNameId),
+ SFM.getNameForId(A->FunctionNameId)) <
+ std::tuple(B->Hash, SFM.getNameForId(B->ModuleNameId),
+ SFM.getNameForId(B->FunctionNameId));
+ });
+ return FuncEntries;
+}
+
+// Get a sorted vector of IndexOperandHashes.
+static IndexOperandHashVecType
+getStableIndexOperandHashes(const StableFunctionEntry *FuncEntry) {
+ IndexOperandHashVecType IndexOperandHashes;
+ for (auto &[Indices, OpndHash] : *FuncEntry->IndexOperandHashMap)
+ IndexOperandHashes.emplace_back(Indices, OpndHash);
+ std::sort(IndexOperandHashes.begin(), IndexOperandHashes.end(),
+ [](auto &A, auto &B) { return A.first < B.first; });
+ return IndexOperandHashes;
+}
+
+void StableFunctionMapRecord::serialize(raw_ostream &OS) const {
+ serialize(OS, FunctionMap.get());
+}
+
+void StableFunctionMapRecord::serialize(raw_ostream &OS,
+ const StableFunctionMap *FunctionMap) {
+ support::endian::Writer Writer(OS, endianness::little);
+
+ // Write Names.
+ auto &Names = FunctionMap->getNames();
+ uint32_t ByteSize = 4;
+ Writer.write<uint32_t>(Names.size());
+ for (auto &Name : Names) {
+ Writer.OS << Name << '\0';
+ ByteSize += Name.size() + 1;
+ }
+ // Align ByteSize to 4 bytes.
+ uint32_t Padding = offsetToAlignment(ByteSize, Align(4));
+ for (uint32_t I = 0; I < Padding; ++I)
+ Writer.OS << '\0';
+
+ // Write StableFunctionEntries whose pointers are sorted.
+ auto FuncEntries = getStableFunctionEntries(*FunctionMap);
+ Writer.write<uint32_t>(FuncEntries.size());
+
+ for (const auto *FuncRef : FuncEntries) {
+ Writer.write<stable_hash>(FuncRef->Hash);
+ Writer.write<uint32_t>(FuncRef->FunctionNameId);
+ Writer.write<uint32_t>(FuncRef->ModuleNameId);
+ Writer.write<uint32_t>(FuncRef->InstCount);
+
+ // Emit IndexOperandHashes sorted from IndexOperandHashMap.
+ IndexOperandHashVecType IndexOperandHashes =
+ getStableIndexOperandHashes(FuncRef);
+ Writer.write<uint32_t>(IndexOperandHashes.size());
+ for (auto &IndexOperandHash : IndexOperandHashes) {
+ Writer.write<uint32_t>(IndexOperandHash.first.first);
+ Writer.write<uint32_t>(IndexOperandHash.first.second);
+ Writer.write<stable_hash>(IndexOperandHash.second);
+ }
+ }
+}
+
+void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
+ // Assert that Ptr is 4-byte aligned
+ assert(((uintptr_t)Ptr % 4) == 0);
+ // Read Names.
+ auto NumNames =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ // Early exit if there is no name.
+ if (NumNames == 0)
+ return;
+ for (unsigned I = 0; I < NumNames; ++I) {
+ std::string Name(reinterpret_cast<const char *>(Ptr));
+ Ptr += Name.size() + 1;
+ FunctionMap->getIdOrCreateForName(Name);
+ }
+ // Align Ptr to 4 bytes.
+ Ptr = reinterpret_cast<const uint8_t *>(alignAddr(Ptr, Align(4)));
+
+ // Read StableFunctionEntries.
+ auto NumFuncs =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ for (unsigned I = 0; I < NumFuncs; ++I) {
+ auto Hash =
+ endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
+ auto FunctionNameId =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ assert(FunctionMap->getNameForId(FunctionNameId) &&
+ "FunctionNameId out of range");
+ auto ModuleNameId =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ assert(FunctionMap->getNameForId(ModuleNameId) &&
+ "ModuleNameId out of range");
+ auto InstCount =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+
+ // Read IndexOperandHashes to build IndexOperandHashMap
+ auto NumIndexOperandHashes =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+ for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
+ auto InstIndex =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ auto OpndIndex =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ auto OpndHash =
+ endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
+ assert(InstIndex < InstCount && "InstIndex out of range");
+
+ auto Indices = std::make_pair(InstIndex, OpndIndex);
+ (*IndexOperandHashMap)[Indices] = OpndHash;
+ }
+
+ // Insert a new StableFunctionEntry into the map.
+ auto FuncEntry = std::make_unique<StableFunctionEntry>(
+ Hash, FunctionNameId, ModuleNameId, InstCount,
+ std::move(IndexOperandHashMap));
+
+ FunctionMap->insert(std::move(FuncEntry));
+ }
+}
+
+void StableFunctionMapRecord::serializeYAML(yaml::Output &YOS) const {
+ auto FuncEntries = getStableFunctionEntries(*FunctionMap);
+ SmallVector<StableFunction> Functions;
+ for (const auto *FuncEntry : FuncEntries) {
+ auto IndexOperandHashes = getStableIndexOperandHashes(FuncEntry);
+ Functions.emplace_back(
+ FuncEntry->Hash, *FunctionMap->getNameForId(FuncEntry->FunctionNameId),
+ *FunctionMap->getNameForId(FuncEntry->ModuleNameId),
+ FuncEntry->InstCount, std::move(IndexOperandHashes));
+ }
+
+ YOS << Functions;
+}
+
+void StableFunctionMapRecord::deserializeYAML(yaml::Input &YIS) {
+ std::vector<StableFunction> Funcs;
+ YIS >> Funcs;
+ for (auto &Func : Funcs)
+ FunctionMap->insert(Func);
+ YIS.nextDocument();
+}
diff --git a/llvm/unittests/CGData/CMakeLists.txt b/llvm/unittests/CGData/CMakeLists.txt
index 792b323130b474..0bdb9e1f08c702 100644
--- a/llvm/unittests/CGData/CMakeLists.txt
+++ b/llvm/unittests/CGData/CMakeLists.txt
@@ -9,6 +9,8 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(CGDataTests
OutlinedHashTreeRecordTest.cpp
OutlinedHashTreeTest.cpp
+ StableFunctionMapRecordTest.cpp
+ StableFunctionMapTest.cpp
)
target_link_libraries(CGDataTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
new file mode 100644
index 00000000000000..f5c9afe449da35
--- /dev/null
+++ b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
@@ -0,0 +1,127 @@
+//===- StableFunctionMapRecordTest.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMapRecord.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(StableFunctionMapRecordTest, Print) {
+ StableFunctionMapRecord MapRecord;
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+ MapRecord.FunctionMap->insert(Func1);
+
+ const char *ExpectedMapStr = R"(---
+- Hash: 1
+ FunctionName: Func1
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 3
+...
+)";
+ std::string MapDump;
+ raw_string_ostream OS(MapDump);
+ MapRecord.print(OS);
+ EXPECT_EQ(ExpectedMapStr, MapDump);
+}
+
+TEST(StableFunctionMapRecordTest, Stable) {
+ StableFunction Func1{1, "Func2", "Mod1", 1, {}};
+ StableFunction Func2{1, "Func3", "Mod1", 1, {}};
+ StableFunction Func3{1, "Func1", "Mod2", 1, {}};
+ StableFunction Func4{2, "Func4", "Mod3", 1, {}};
+
+ StableFunctionMapRecord MapRecord1;
+ MapRecord1.FunctionMap->insert(Func1);
+ MapRecord1.FunctionMap->insert(Func2);
+ MapRecord1.FunctionMap->insert(Func3);
+ MapRecord1.FunctionMap->insert(Func4);
+
+ StableFunctionMapRecord MapRecord2;
+ MapRecord2.FunctionMap->insert(Func4);
+ MapRecord2.FunctionMap->insert(Func3);
+ MapRecord2.FunctionMap->insert(Func2);
+ MapRecord2.FunctionMap->insert(Func1);
+
+ // Output is sorted by hash (1 < 2), module name (Mod1 < Mod2), and function
+ // name (Func2 < Func3).
+ std::string MapDump1;
+ raw_string_ostream OS1(MapDump1);
+ MapRecord1.print(OS1);
+ std::string MapDump2;
+ raw_string_ostream OS2(MapDump2);
+ MapRecord2.print(OS2);
+ EXPECT_EQ(MapDump1, MapDump2);
+}
+
+TEST(StableFunctionMapRecordTest, Serialize) {
+ StableFunctionMapRecord MapRecord1;
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 2}, 4}}};
+ StableFunction Func2{2, "Func2", "Mod1", 3, {{{0, 1}, 2}}};
+ StableFunction Func3{2, "Func3", "Mod1", 3, {{{0, 1}, 3}}};
+ MapRecord1.FunctionMap->insert(Func1);
+ MapRecord1.FunctionMap->insert(Func2);
+ MapRecord1.FunctionMap->insert(Func3);
+
+ // Serialize and deserialize the map.
+ SmallVector<char> Out;
+ raw_svector_ostream OS(Out);
+ MapRecord1.serialize(OS);
+
+ StableFunctionMapRecord MapRecord2;
+ const uint8_t *Data = reinterpret_cast<const uint8_t *>(Out.data());
+ MapRecord2.deserialize(Data);
+
+ // Two maps should be identical.
+ std::string MapDump1;
+ raw_string_ostream OS1(MapDump1);
+ MapRecord1.print(OS1);
+ std::string MapDump2;
+ raw_string_ostream OS2(MapDump2);
+ MapRecord2.print(OS2);
+
+ EXPECT_EQ(MapDump1, MapDump2);
+}
+
+TEST(StableFunctionMapRecordTest, SerializeYAML) {
+ StableFunctionMapRecord MapRecord1;
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 2}, 4}}};
+ StableFunction Func2{2, "Func2", "Mod1", 3, {{{0, 1}, 2}}};
+ StableFunction Func3{2, "Func3", "Mod1", 3, {{{0, 1}, 3}}};
+ MapRecord1.FunctionMap->insert(Func1);
+ MapRecord1.FunctionMap->insert(Func2);
+ MapRecord1.FunctionMap->insert(Func3);
+
+ // Serialize and deserialize the map in a YAML format.
+ std::string Out;
+ raw_string_ostream OS(Out);
+ yaml::Output YOS(OS);
+ MapRecord1.serializeYAML(YOS);
+
+ StableFunctionMapRecord MapRecord2;
+ yaml::Input YIS(StringRef(Out.data(), Out.size()));
+ MapRecord2.deserializeYAML(YIS);
+
+ // Two maps should be identical.
+ std::string MapDump1;
+ raw_string_ostream OS1(MapDump1);
+ MapRecord1.print(OS1);
+ std::string MapDump2;
+ raw_string_ostream OS2(MapDump2);
+ MapRecord2.print(OS2);
+
+ EXPECT_EQ(MapDump1, MapDump2);
+}
+
+} // end namespace
diff --git a/llvm/unittests/CGData/StableFunctionMapTest.cpp b/llvm/unittests/CGData/StableFunctionMapTest.cpp
new file mode 100644
index 00000000000000..839b997cdacac2
--- /dev/null
+++ b/llvm/unittests/CGData/StableFunctionMapTest.cpp
@@ -0,0 +1,153 @@
+//===- StableFunctionMapTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMap.h"
+#include "gmock/gmock-matchers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+using testing::Contains;
+using testing::Key;
+using testing::Not;
+using testing::Pair;
+using testing::SizeIs;
+
+TEST(StableFunctionMap, Name) {
+ StableFunctionMap Map;
+ EXPECT_TRUE(Map.empty());
+ EXPECT_TRUE(Map.getNames().empty());
+ unsigned ID1 = Map.getIdOrCreateForName("Func1");
+ unsigned ID2 = Map.getIdOrCreateForName("Func2");
+ unsigned ID3 = Map.getIdOrCreateForName("Func1");
+
+ EXPECT_THAT(Map.getNames(), SizeIs(2));
+ // The different names should return different IDs.
+ EXPECT_NE(ID1, ID2);
+ // The same name should return the same ID.
+ EXPECT_EQ(ID1, ID3);
+ // The IDs should be valid.
+ EXPECT_EQ(*Map.getNameForId(ID1), "Func1");
+ EXPECT_EQ(*Map.getNameForId(ID2), "Func2");
+}
+
+TEST(StableFunctionMap, Insert) {
+ StableFunctionMap Map;
+
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+ StableFunction Func2{1, "Func2", "Mod1", 2, {{{0, 1}, 2}}};
+ Map.insert(Func1);
+ Map.insert(Func2);
+ // We only have a unique hash, 1
+ EXPECT_THAT(Map, SizeIs(1));
+ // We have two functions with the same hash which are potentially mergeable.
+ EXPECT_EQ(Map.size(StableFunctionMap::SizeType::TotalFunctionCount), 2u);
+ EXPECT_EQ(Map.size(StableFunctionMap::SizeType::MergeableFunctionCount), 2u);
+}
+
+TEST(StableFunctionMap, InsertEntry) {
+ StableFunctionMap Map;
+
+ unsigned ID1 = Map.getIdOrCreateForName("Func1");
+ unsigned ID2 = Map.getIdOrCreateForName("Mod1");
+ unsigned ID3 = Map.getIdOrCreateForName("Func2");
+
+ // Create a function entry and insert it into the map.
+ auto IndexOperandHashMap1 = std::make_unique<IndexOperandHashMapType>();
+ IndexOperandHashMap1->try_emplace({1, 1}, 3);
+ auto FuncEntry1 = std::make_unique<StableFunctionEntry>(
+ 1, ID1, ID2, 2, std::move(IndexOperandHashMap1));
+ Map.insert(std::move(FuncEntry1));
+
+ // Create another function entry and insert it into the map.
+ auto IndexOperandHashMap2 = std::make_unique<IndexOperandHashMapType>();
+ IndexOperandHashMap2->try_emplace({0, 1}, 2);
+ auto FuncEntry2 = std::make_unique<StableFunctionEntry>(
+ 1, ID3, ID2, 2, std::move(IndexOperandHashMap2));
+ Map.insert(std::move(FuncEntry2));
+
+ // We only have a unique hash, 1
+ EXPECT_THAT(Map, SizeIs(1));
+ // We have two functions with the same hash which are potentially mergeable.
+ EXPECT_EQ(Map.size(StableFunctionMap::SizeType::TotalFunctionCount), 2u);
+}
+
+TEST(StableFunctionMap, Merge) {
+ StableFunctionMap Map1;
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+ StableFunction Func2{1, "Func2", "Mod1", 2, {{{0, 1}, 2}}};
+ StableFunction Func3{2, "Func3", "Mod1", 2, {{{1, 1}, 2}}};
+ Map1.insert(Func1);
+ Map1.insert(Func2);
+ Map1.insert(Func3);
+
+ StableFunctionMap Map2;
+ StableFunction Func4{1, "Func4", "Mod2", 2, {{{0, 1}, 4}}};
+ StableFunction Func5{2, "Func5", "Mod2", 2, {{{1, 1}, 5}}};
+ StableFunction Func6{3, "Func6", "Mod2", 2, {{{1, 1}, 6}}};
+ Map2.insert(Func4);
+ Map2.insert(Func5);
+ Map2.insert(Func6);
+
+ // Merge two maps.
+ Map1.merge(Map2);
+
+ // We only have two unique hashes, 1, 2 and 3
+ EXPECT_THAT(Map1, SizeIs(3));
+ // We have total 6 functions.
+ EXPECT_EQ(Map1.size(StableFunctionMap::SizeType::TotalFunctionCount), 6u);
+ // We have 5 mergeable functions. Func6 only has a unique hash, 3.
+ EXPECT_EQ(Map1.size(StableFunctionMap::SizeType::MergeableFunctionCount), 5u);
+}
+
+TEST(StableFunctionMap, Finalize1) {
+ StableFunctionMap Map;
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+ StableFunction Func2{1, "Func2", "Mod2", 3, {{{0, 1}, 2}}};
+ Map.insert(Func1);
+ Map.insert(Func2);
+
+ // Instruction count is mis-matched, so they're not mergeable.
+ Map.finalize();
+ EXPECT_TRUE(Map.empty());
+}
+
+TEST(StableFunctionMap, Finalize2) {
+ StableFunctionMap Map;
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+ StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}};
+ Map.insert(Func1);
+ Map.insert(Func2);
+
+ // Operand map size is mis-matched, so they're not mergeable.
+ Map.finalize();
+ EXPECT_TRUE(Map.empty());
+}
+
+TEST(StableFunctionMap, Finalize3) {
+ StableFunctionMap Map;
+ StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 1}, 1}}};
+ StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}};
+ Map.insert(Func1);
+ Map.insert(Func2);
+
+ // The same operand entry is removed, which is redundant.
+ Map.finalize();
+ auto &M = Map.getFunctionMap();
+ EXPECT_THAT(M, SizeIs(1));
+ auto &FuncEntries = M.begin()->second;
+ for (auto &FuncEntry : FuncEntries) {
+ EXPECT_THAT(*FuncEntry->IndexOperandHashMap, SizeIs(1));
+ ASSERT_THAT(*FuncEntry->IndexOperandHashMap,
+ Not(Contains(Key(Pair(1, 1)))));
+ }
+}
+
+} // end namespace
>From c7913f9fff736da4cc6a78a17e41dc539bc75e8a Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 9 Sep 2024 19:38:05 -0700
Subject: [PATCH 2/5] [CGData][llvm-cgdata] Support for stable function map
This introduces a new cgdata format for stable function maps.
The raw data is embedded in the __llvm_merge section during compile time.
This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal.
---
lld/test/MachO/cgdata-generate.s | 6 +-
llvm/docs/CommandGuide/llvm-cgdata.rst | 16 ++--
llvm/include/llvm/CGData/CodeGenData.h | 24 +++++-
llvm/include/llvm/CGData/CodeGenData.inc | 12 ++-
llvm/include/llvm/CGData/CodeGenDataReader.h | 29 ++++++-
llvm/include/llvm/CGData/CodeGenDataWriter.h | 17 +++-
llvm/lib/CGData/CodeGenData.cpp | 30 ++++---
llvm/lib/CGData/CodeGenDataReader.cpp | 63 +++++++++-----
llvm/lib/CGData/CodeGenDataWriter.cpp | 30 ++++++-
llvm/test/tools/llvm-cgdata/empty.test | 8 +-
llvm/test/tools/llvm-cgdata/error.test | 13 +--
.../merge-combined-funcmap-hashtree.test | 66 +++++++++++++++
.../llvm-cgdata/merge-funcmap-archive.test | 83 +++++++++++++++++++
.../llvm-cgdata/merge-funcmap-concat.test | 78 +++++++++++++++++
.../llvm-cgdata/merge-funcmap-double.test | 79 ++++++++++++++++++
.../llvm-cgdata/merge-funcmap-single.test | 36 ++++++++
...chive.test => merge-hashtree-archive.test} | 8 +-
...concat.test => merge-hashtree-concat.test} | 6 +-
...double.test => merge-hashtree-double.test} | 8 +-
...single.test => merge-hashtree-single.test} | 4 +-
llvm/tools/llvm-cgdata/llvm-cgdata.cpp | 48 ++++++++---
21 files changed, 577 insertions(+), 87 deletions(-)
create mode 100644 llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
rename llvm/test/tools/llvm-cgdata/{merge-archive.test => merge-hashtree-archive.test} (91%)
rename llvm/test/tools/llvm-cgdata/{merge-concat.test => merge-hashtree-concat.test} (93%)
rename llvm/test/tools/llvm-cgdata/{merge-double.test => merge-hashtree-double.test} (90%)
rename llvm/test/tools/llvm-cgdata/{merge-single.test => merge-hashtree-single.test} (92%)
diff --git a/lld/test/MachO/cgdata-generate.s b/lld/test/MachO/cgdata-generate.s
index 174df39d666c5d..f942ae07f64e0e 100644
--- a/lld/test/MachO/cgdata-generate.s
+++ b/lld/test/MachO/cgdata-generate.s
@@ -3,12 +3,12 @@
# RUN: rm -rf %t; split-file %s %t
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
+# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
+# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s
# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o
diff --git a/llvm/docs/CommandGuide/llvm-cgdata.rst b/llvm/docs/CommandGuide/llvm-cgdata.rst
index f592e1508844ee..0670decd087e39 100644
--- a/llvm/docs/CommandGuide/llvm-cgdata.rst
+++ b/llvm/docs/CommandGuide/llvm-cgdata.rst
@@ -11,15 +11,13 @@ SYNOPSIS
DESCRIPTION
-----------
-The :program:llvm-cgdata utility parses raw codegen data embedded
-in compiled binary files and merges them into a single .cgdata file.
-It can also inspect and manipulate .cgdata files.
-Currently, the tool supports saving and restoring outlined hash trees,
-enabling global function outlining across modules, allowing for more
-efficient function outlining in subsequent compilations.
-The design is extensible, allowing for the incorporation of additional
-codegen summaries and optimization techniques, such as global function
-merging, in the future.
+The :program:llvm-cgdata utility parses raw codegen data embedded in compiled
+binary files and merges them into a single .cgdata file. It can also inspect
+and manipulate .cgdata files. Currently, the tool supports saving and restoring
+outlined hash trees and stable function maps, allowing for more efficient
+function outlining and function merging across modules in subsequent
+compilations. The design is extensible, allowing for the incorporation of
+additional codegen summaries and optimization techniques.
COMMANDS
--------
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 53550beeae1f83..5d7c74725ccef1 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -19,6 +19,7 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CGData/OutlinedHashTree.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
@@ -41,7 +42,9 @@ enum class CGDataKind {
Unknown = 0x0,
// A function outlining info.
FunctionOutlinedHashTree = 0x1,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
+ // A function merging info.
+ StableFunctionMergingMap = 0x2,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
};
const std::error_category &cgdata_category();
@@ -108,6 +111,8 @@ enum CGDataMode {
class CodeGenData {
/// Global outlined hash tree that has oulined hash sequences across modules.
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
+ /// Global stable function map that has stable function info across modules.
+ std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
/// This flag is set when -fcodegen-data-generate is passed.
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
@@ -131,6 +136,9 @@ class CodeGenData {
bool hasOutlinedHashTree() {
return PublishedHashTree && !PublishedHashTree->empty();
}
+ bool hasStableFunctionMap() {
+ return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
+ }
/// Returns the outlined hash tree. This can be globally used in a read-only
/// manner.
@@ -147,6 +155,12 @@ class CodeGenData {
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
+ void
+ publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
+ PublishedStableFunctionMap = std::move(FunctionMap);
+ // Ensure we disable emitCGData as we do not want to read and write both.
+ EmitCGData = false;
+ }
};
namespace cgdata {
@@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}
+inline void
+publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
+ CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
+}
+
struct StreamCacheData {
/// Backing buffer for serialized data stream.
SmallVector<SmallString<0>> Outputs;
@@ -249,6 +268,8 @@ enum CGDataVersion {
// Version 1 is the first version. This version supports the outlined
// hash tree.
Version1 = 1,
+ // Version 2 supports the stable function merging map.
+ Version2 = 2,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;
@@ -258,6 +279,7 @@ struct Header {
uint32_t Version;
uint32_t DataKind;
uint64_t OutlinedHashTreeOffset;
+ uint64_t StableFunctionMapOffset;
// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc
index 08ec14ea051a0c..e0ae7a51024d87 100644
--- a/llvm/include/llvm/CGData/CodeGenData.inc
+++ b/llvm/include/llvm/CGData/CodeGenData.inc
@@ -20,6 +20,8 @@
#define CG_DATA_DEFINED
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
CG_DATA_OUTLINE_COFF, "__DATA,")
+CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
+ CG_DATA_MERGE_COFF, "__DATA,")
#undef CG_DATA_SECT_ENTRY
#endif
@@ -27,20 +29,24 @@ CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
/* section name strings common to all targets other
than WIN32 */
#define CG_DATA_OUTLINE_COMMON __llvm_outline
+#define CG_DATA_MERGE_COMMON __llvm_merge
/* Since cg data sections are not allocated, we don't need to
* access them at runtime.
*/
#define CG_DATA_OUTLINE_COFF ".loutline"
+#define CG_DATA_MERGE_COFF ".lmerge"
#ifdef _WIN32
/* Runtime section names and name strings. */
-#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
+#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF
+#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF
#else
/* Runtime section names and name strings. */
-#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
+#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
+#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON)
#endif
/* Indexed codegen data format version (start from 1). */
-#define CG_DATA_INDEX_VERSION 1
+#define CG_DATA_INDEX_VERSION 2
diff --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h
index 7e4882df2116e2..085dd6dd747c90 100644
--- a/llvm/include/llvm/CGData/CodeGenDataReader.h
+++ b/llvm/include/llvm/CGData/CodeGenDataReader.h
@@ -15,6 +15,7 @@
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/VirtualFileSystem.h"
@@ -36,10 +37,15 @@ class CodeGenDataReader {
virtual CGDataKind getDataKind() const = 0;
/// Return true if the data has an outlined hash tree.
virtual bool hasOutlinedHashTree() const = 0;
+ /// Return true if the data has a stable function map.
+ virtual bool hasStableFunctionMap() const = 0;
/// Return the outlined hash tree that is released from the reader.
std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
return std::move(HashTreeRecord.HashTree);
}
+ std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() {
+ return std::move(FunctionMapRecord.FunctionMap);
+ }
/// Factory method to create an appropriately typed reader for the given
/// codegen data file path and file system.
@@ -56,15 +62,21 @@ class CodeGenDataReader {
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
/// the merged data.
- static Error mergeFromObjectFile(const object::ObjectFile *Obj,
- OutlinedHashTreeRecord &GlobalOutlineRecord,
- stable_hash *CombinedHash = nullptr);
+ static Error
+ mergeFromObjectFile(const object::ObjectFile *Obj,
+ OutlinedHashTreeRecord &GlobalOutlineRecord,
+ StableFunctionMapRecord &GlobalFunctionMapRecord,
+ stable_hash *CombinedHash = nullptr);
protected:
/// The outlined hash tree that has been read. When it's released by
/// releaseOutlinedHashTree(), it's no longer valid.
OutlinedHashTreeRecord HashTreeRecord;
+ /// The stable function map that has been read. When it's released by
+ // releaseStableFunctionMap(), it's no longer valid.
+ StableFunctionMapRecord FunctionMapRecord;
+
/// Set the current error and return same.
Error error(cgdata_error Err, const std::string &ErrMsg = "") {
LastError = Err;
@@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader {
return Header.DataKind &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
+ /// Return true if the header indicates the data has a stable function map.
+ bool hasStableFunctionMap() const override {
+ return Header.DataKind &
+ static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
+ }
};
/// This format is a simple text format that's suitable for test data.
@@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
+ /// Return true if the header indicates the data has a stable function map.
+ /// This does not mean that the data is still available.
+ bool hasStableFunctionMap() const override {
+ return static_cast<uint32_t>(DataKind) &
+ static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
+ }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CGData/CodeGenDataWriter.h b/llvm/include/llvm/CGData/CodeGenDataWriter.h
index 5cb8377b1d07e5..1c4247608999a7 100644
--- a/llvm/include/llvm/CGData/CodeGenDataWriter.h
+++ b/llvm/include/llvm/CGData/CodeGenDataWriter.h
@@ -15,6 +15,7 @@
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
@@ -57,6 +58,9 @@ class CodeGenDataWriter {
/// The outlined hash tree to be written.
OutlinedHashTreeRecord HashTreeRecord;
+ /// The stable function map to be written.
+ StableFunctionMapRecord FunctionMapRecord;
+
/// A bit mask describing the kind of the codegen data.
CGDataKind DataKind = CGDataKind::Unknown;
@@ -64,9 +68,12 @@ class CodeGenDataWriter {
CodeGenDataWriter() = default;
~CodeGenDataWriter() = default;
- /// Add the outlined hash tree record. The input Record is released.
+ /// Add the outlined hash tree record. The input hash tree is released.
void addRecord(OutlinedHashTreeRecord &Record);
+ /// Add the stable function map record. The input function map is released.
+ void addRecord(StableFunctionMapRecord &Record);
+
/// Write the codegen data to \c OS
Error write(raw_fd_ostream &OS);
@@ -81,11 +88,19 @@ class CodeGenDataWriter {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
+ /// Return true if the header indicates the data has a stable function map.
+ bool hasStableFunctionMap() const {
+ return static_cast<uint32_t>(DataKind) &
+ static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
+ }
private:
/// The offset of the outlined hash tree in the file.
uint64_t OutlinedHashTreeOffset;
+ /// The offset of the stable function map in the file.
+ uint64_t StableFunctionMapOffset;
+
/// Write the codegen data header to \c COS
Error writeHeader(CGDataOStream &COS);
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 2a3a74c8bc37af..88dcdfd1f931a2 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -14,6 +14,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
@@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() {
auto Reader = ReaderOrErr->get();
if (Reader->hasOutlinedHashTree())
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
+ if (Reader->hasStableFunctionMap())
+ Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
}
});
return *Instance;
@@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
return make_error<CGDataError>(cgdata_error::unsupported_version);
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
- switch (H.Version) {
- // When a new field is added to the header add a case statement here to
- // compute the size as offset of the new field + size of the new field. This
- // relies on the field being added to the end of the list.
- static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
- "Please update the size computation below if a new field has "
- "been added to the header, if not add a case statement to "
- "fall through to the latest version.");
- case 1ull:
- H.OutlinedHashTreeOffset =
+ static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
+ "Please update the offset computation below if a new field has "
+ "been added to the header.");
+ H.OutlinedHashTreeOffset =
+ endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
+ if (H.Version >= 2)
+ H.StableFunctionMapOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
- }
return H;
}
@@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
OutlinedHashTreeRecord GlobalOutlineRecord;
+ StableFunctionMapRecord GlobalStableFunctionMapRecord;
stable_hash CombinedHash = 0;
for (auto File : ObjFiles) {
if (File.empty())
@@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
if (auto E = CodeGenDataReader::mergeFromObjectFile(
- Obj.get(), GlobalOutlineRecord, &CombinedHash))
+ Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
+ &CombinedHash))
return E;
}
+ GlobalStableFunctionMapRecord.finalize();
+
if (!GlobalOutlineRecord.empty())
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
+ if (!GlobalStableFunctionMapRecord.empty())
+ cgdata::publishStableFunctionMap(
+ std::move(GlobalStableFunctionMapRecord.FunctionMap));
return CombinedHash;
}
diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index 2f2481ea60f822..ebeb4ae36f99f3 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -32,10 +32,40 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
Error CodeGenDataReader::mergeFromObjectFile(
const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
+ StableFunctionMapRecord &GlobalFunctionMapRecord,
stable_hash *CombinedHash) {
Triple TT = Obj->makeTriple();
auto CGOutLineName =
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
+ auto CGMergeName =
+ getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
+
+ auto processSectionContents = [&](const StringRef &Name,
+ const StringRef &Contents) {
+ if (Name != CGOutLineName && Name != CGMergeName)
+ return;
+ if (CombinedHash)
+ *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
+ auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
+ auto *EndData = Data + Contents.size();
+ // In case dealing with an executable that has concatenated cgdata,
+ // we want to merge them into a single cgdata.
+ // Although it's not a typical workflow, we support this scenario
+ // by looping over all data in the sections.
+ if (Name == CGOutLineName) {
+ while (Data != EndData) {
+ OutlinedHashTreeRecord LocalOutlineRecord;
+ LocalOutlineRecord.deserialize(Data);
+ GlobalOutlineRecord.merge(LocalOutlineRecord);
+ }
+ } else if (Name == CGMergeName) {
+ while (Data != EndData) {
+ StableFunctionMapRecord LocalFunctionMapRecord;
+ LocalFunctionMapRecord.deserialize(Data);
+ GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
+ }
+ }
+ };
for (auto &Section : Obj->sections()) {
Expected<StringRef> NameOrErr = Section.getName();
@@ -44,23 +74,7 @@ Error CodeGenDataReader::mergeFromObjectFile(
Expected<StringRef> ContentsOrErr = Section.getContents();
if (!ContentsOrErr)
return ContentsOrErr.takeError();
- auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data());
- auto *EndData = Data + ContentsOrErr->size();
-
- if (*NameOrErr == CGOutLineName) {
- if (CombinedHash)
- *CombinedHash =
- stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
- // In case dealing with an executable that has concatenated cgdata,
- // we want to merge them into a single cgdata.
- // Although it's not a typical workflow, we support this scenario.
- while (Data != EndData) {
- OutlinedHashTreeRecord LocalOutlineRecord;
- LocalOutlineRecord.deserialize(Data);
- GlobalOutlineRecord.merge(LocalOutlineRecord);
- }
- }
- // TODO: Add support for other cgdata sections.
+ processSectionContents(*NameOrErr, *ContentsOrErr);
}
return Error::success();
@@ -69,7 +83,8 @@ Error CodeGenDataReader::mergeFromObjectFile(
Error IndexedCodeGenDataReader::read() {
using namespace support;
- // The smallest header with the version 1 is 24 bytes
+ // The smallest header with the version 1 is 24 bytes.
+ // Do not update this value even with the new version of the header.
const unsigned MinHeaderSize = 24;
if (DataBuffer->getBufferSize() < MinHeaderSize)
return error(cgdata_error::bad_header);
@@ -87,6 +102,12 @@ Error IndexedCodeGenDataReader::read() {
return error(cgdata_error::eof);
HashTreeRecord.deserialize(Ptr);
}
+ if (hasStableFunctionMap()) {
+ const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
+ if (Ptr >= End)
+ return error(cgdata_error::eof);
+ FunctionMapRecord.deserialize(Ptr);
+ }
return success();
}
@@ -152,6 +173,8 @@ Error TextCodeGenDataReader::read() {
StringRef Str = Line->drop_front().rtrim();
if (Str.equals_insensitive("outlined_hash_tree"))
DataKind |= CGDataKind::FunctionOutlinedHashTree;
+ else if (Str.equals_insensitive("stable_function_map"))
+ DataKind |= CGDataKind::StableFunctionMergingMap;
else
return error(cgdata_error::bad_header);
}
@@ -170,8 +193,8 @@ Error TextCodeGenDataReader::read() {
yaml::Input YOS(StringRef(Pos, Size));
if (hasOutlinedHashTree())
HashTreeRecord.deserializeYAML(YOS);
-
- // TODO: Add more yaml cgdata in order
+ if (hasStableFunctionMap())
+ FunctionMapRecord.deserializeYAML(YOS);
return Error::success();
}
diff --git a/llvm/lib/CGData/CodeGenDataWriter.cpp b/llvm/lib/CGData/CodeGenDataWriter.cpp
index 5f638be0fefe74..3a392036198a97 100644
--- a/llvm/lib/CGData/CodeGenDataWriter.cpp
+++ b/llvm/lib/CGData/CodeGenDataWriter.cpp
@@ -52,6 +52,13 @@ void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) {
DataKind |= CGDataKind::FunctionOutlinedHashTree;
}
+void CodeGenDataWriter::addRecord(StableFunctionMapRecord &Record) {
+ assert(Record.FunctionMap && "empty function map in the record");
+ FunctionMapRecord.FunctionMap = std::move(Record.FunctionMap);
+
+ DataKind |= CGDataKind::StableFunctionMergingMap;
+}
+
Error CodeGenDataWriter::write(raw_fd_ostream &OS) {
CGDataOStream COS(OS);
return writeImpl(COS);
@@ -68,8 +75,11 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree))
Header.DataKind |=
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
-
+ if (static_cast<bool>(DataKind & CGDataKind::StableFunctionMergingMap))
+ Header.DataKind |=
+ static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
Header.OutlinedHashTreeOffset = 0;
+ Header.StableFunctionMapOffset = 0;
// Only write up to the CGDataKind. We need to remember the offset of the
// remaining fields to allow back-patching later.
@@ -83,6 +93,12 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
// Reserve the space for OutlinedHashTreeOffset field.
COS.write(0);
+ // Save the location of Header.StableFunctionMapOffset field in \c COS.
+ StableFunctionMapOffset = COS.tell();
+
+ // Reserve the space for StableFunctionMapOffset field.
+ COS.write(0);
+
return Error::success();
}
@@ -93,10 +109,14 @@ Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
uint64_t OutlinedHashTreeFieldStart = COS.tell();
if (hasOutlinedHashTree())
HashTreeRecord.serialize(COS.OS);
+ uint64_t StableFunctionMapFieldStart = COS.tell();
+ if (hasStableFunctionMap())
+ FunctionMapRecord.serialize(COS.OS);
// Back patch the offsets.
CGDataPatchItem PatchItems[] = {
- {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}};
+ {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1},
+ {StableFunctionMapOffset, &StableFunctionMapFieldStart, 1}};
COS.patch(PatchItems);
return Error::success();
@@ -106,6 +126,9 @@ Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) {
if (hasOutlinedHashTree())
OS << "# Outlined stable hash tree\n:outlined_hash_tree\n";
+ if (hasStableFunctionMap())
+ OS << "# Stable function map\n:stable_function_map\n";
+
// TODO: Add more data types in this header
return Error::success();
@@ -119,6 +142,9 @@ Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) {
if (hasOutlinedHashTree())
HashTreeRecord.serializeYAML(YOS);
+ if (hasStableFunctionMap())
+ FunctionMapRecord.serializeYAML(YOS);
+
// TODO: Write more yaml cgdata in order
return Error::success();
diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
index 70d5ea4b800630..bea78d512a6db7 100644
--- a/llvm/test/tools/llvm-cgdata/empty.test
+++ b/llvm/test/tools/llvm-cgdata/empty.test
@@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
# The version number appears when asked, as it's in the header
RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
-VERSION: Version: 1
+VERSION: Version: 2
# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@@ -27,9 +27,11 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
# uint32_t Version;
# uint32_t DataKind;
# uint64_t OutlinedHashTreeOffset;
+# uint64_t StableFunctionMapOffset;
# }
RUN: printf '\xffcgdata\x81' > %t_header.cgdata
-RUN: printf '\x01\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x02\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
RUN: diff %t_header.cgdata %t_emptyheader.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
index c992174505c1ad..2caa3aef403950 100644
--- a/llvm/test/tools/llvm-cgdata/error.test
+++ b/llvm/test/tools/llvm-cgdata/error.test
@@ -6,6 +6,7 @@
# uint32_t Version;
# uint32_t DataKind;
# uint64_t OutlinedHashTreeOffset;
+# uint64_t StableFunctionMapOffset;
# }
RUN: touch %t_empty.cgdata
RUN: not llvm-cgdata --show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY
@@ -21,18 +22,20 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT
CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
-# The current version 1 while the header says 2.
+# The current version 2 while the header says 3.
RUN: printf '\xffcgdata\x81' > %t_version.cgdata
-RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x03\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
RUN: not llvm-cgdata --show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix=BAD_VERSION
BAD_VERSION: {{.}}cgdata: unsupported codegen data version
# Header says an outlined hash tree, but the file ends after the header.
RUN: printf '\xffcgdata\x81' > %t_eof.cgdata
+RUN: printf '\x02\x00\x00\x00' >> %t_eof.cgdata
RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
-RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
+RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
RUN: not llvm-cgdata --show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix=EOF
EOF: {{.}}cgdata: end of File
diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
new file mode 100644
index 00000000000000..b9bf067d3771c5
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
@@ -0,0 +1,66 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Test merge a single object file having both __llvm_outline and __llvm_merge into a cgdata.
+# Effectively, this test combines merge-hashtree.test and merge-funcmap.test.
+
+RUN: split-file %s %t
+
+# Synthesize raw hashtree bytes without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-hashtree.cgtext -o %t/raw-hashtree.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-hashtree.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-hashtree-bytes.txt
+
+# Synthesize raw funcmap bytes without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-funcmap.cgtext -o %t/raw-funcmap.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-funcmap.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-funcmap-bytes.txt
+
+# Synthesize a bitcode file by creating two sections for the hash tree and the function map, respectively.
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-hashtree-bytes.txt)/g" %t/merge-both-template.ll > %t/merge-both-hashtree-template.ll
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-funcmap-bytes.txt)/g" %t/merge-both-hashtree-template.ll > %t/merge-both-hashtree-funcmap.ll
+
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcmap.ll -o %t/merge-both-hashtree-funcmap.o
+
+# Merge an object file having cgdata (__llvm_outline and __llvm_merge)
+RUN: llvm-cgdata -m %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
+RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s
+
+CHECK: Outlined hash tree:
+CHECK-NEXT: Total Node Count: 3
+CHECK-NEXT: Terminal Node Count: 1
+CHECK-NEXT: Depth: 2
+CHECK-NEXT: Stable function map:
+CHECK-NEXT: Unique hash Count: 1
+CHECK-NEXT: Total function Count: 1
+CHECK-NEXT: Mergeable function Count: 0
+
+;--- raw-hashtree.cgtext
+:outlined_hash_tree
+0:
+ Hash: 0x0
+ Terminals: 0
+ SuccessorIds: [ 1 ]
+1:
+ Hash: 0x1
+ Terminals: 0
+ SuccessorIds: [ 2 ]
+2:
+ Hash: 0x2
+ Terminals: 4
+ SuccessorIds: [ ]
+...
+
+;--- raw-funcmap.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func1
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 3
+...
+
+;--- merge-both-template.ll
+ at .data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
+ at .data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
new file mode 100644
index 00000000000000..f643c8d92073e3
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
@@ -0,0 +1,83 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge an archive that has two object files having cgdata (__llvm_merge)
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
+
+# Make an archive from two object files
+RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
+
+# Merge the archive into the codegen data file.
+RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata
+RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
+
+RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s
+CHECK: Stable function map:
+CHECK-NEXT: Unique hash Count: 1
+CHECK-NEXT: Total function Count: 2
+CHECK-NEXT: Mergeable function Count: 2
+
+RUN: llvm-cgdata --convert %t/merge-archive.cgdata| FileCheck %s --check-prefix=MAP
+MAP: # Stable function map
+MAP-NEXT: :stable_function_map
+MAP-NEXT: ---
+MAP-NEXT: - Hash: 1
+MAP-NEXT: FunctionName: Func1
+MAP-NEXT: ModuleName: Mod1
+MAP-NEXT: InstCount: 2
+MAP-NEXT: IndexOperandHashes:
+MAP-NEXT: - InstIndex: 0
+MAP-NEXT: OpndIndex: 1
+MAP-NEXT: OpndHash: 3
+MAP-NEXT: - Hash: 1
+MAP-NEXT: FunctionName: Func2
+MAP-NEXT: ModuleName: Mod1
+MAP-NEXT: InstCount: 2
+MAP-NEXT: IndexOperandHashes:
+MAP-NEXT: - InstIndex: 0
+MAP-NEXT: OpndIndex: 1
+MAP-NEXT: OpndHash: 4
+MAP-NEXT: ...
+
+;--- raw-1.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func2
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 4
+...
+
+;--- merge-1-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+
+;--- raw-2.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func1
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 3
+...
+
+;--- merge-2-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
new file mode 100644
index 00000000000000..c8acf1f3916e5a
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
@@ -0,0 +1,78 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge a binary file (e.g., a linked executable) having concatenated cgdata (__llvm_merge)
+
+RUN: split-file %s %t
+
+# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata.
+# Concatenate them in merge-concat.ll
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll
+
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
+RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata
+RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
+
+CHECK: Stable function map:
+CHECK-NEXT: Unique hash Count: 1
+CHECK-NEXT: Total function Count: 2
+CHECK-NEXT: Mergeable function Count: 2
+
+RUN: llvm-cgdata --convert %t/merge-concat.cgdata| FileCheck %s --check-prefix=MAP
+MAP: # Stable function map
+MAP-NEXT: :stable_function_map
+MAP-NEXT: ---
+MAP-NEXT: - Hash: 1
+MAP-NEXT: FunctionName: Func1
+MAP-NEXT: ModuleName: Mod1
+MAP-NEXT: InstCount: 2
+MAP-NEXT: IndexOperandHashes:
+MAP-NEXT: - InstIndex: 0
+MAP-NEXT: OpndIndex: 1
+MAP-NEXT: OpndHash: 3
+MAP-NEXT: - Hash: 1
+MAP-NEXT: FunctionName: Func2
+MAP-NEXT: ModuleName: Mod1
+MAP-NEXT: InstCount: 2
+MAP-NEXT: IndexOperandHashes:
+MAP-NEXT: - InstIndex: 0
+MAP-NEXT: OpndIndex: 1
+MAP-NEXT: OpndHash: 4
+MAP-NEXT: ...
+
+;--- raw-1.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func2
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 4
+...
+
+;--- raw-2.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func1
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 3
+...
+
+;--- merge-concat-template.ll
+
+; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated.
+; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated.
+; In other words, the following two trees are encoded back-to-back in a binary format.
+ at .data1 = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
new file mode 100644
index 00000000000000..3ae67f062f820f
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
@@ -0,0 +1,79 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Merge two object files having cgdata (__llvm_merge)
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
+
+# Merge two object files into the codegen data file.
+RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
+
+RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
+CHECK: Stable function map:
+CHECK-NEXT: Unique hash Count: 1
+CHECK-NEXT: Total function Count: 2
+CHECK-NEXT: Mergeable function Count: 2
+
+RUN: llvm-cgdata --convert %t/merge.cgdata | FileCheck %s --check-prefix=MAP
+MAP: # Stable function map
+MAP-NEXT: :stable_function_map
+MAP-NEXT: ---
+MAP-NEXT: - Hash: 1
+MAP-NEXT: FunctionName: Func1
+MAP-NEXT: ModuleName: Mod1
+MAP-NEXT: InstCount: 2
+MAP-NEXT: IndexOperandHashes:
+MAP-NEXT: - InstIndex: 0
+MAP-NEXT: OpndIndex: 1
+MAP-NEXT: OpndHash: 3
+MAP-NEXT: - Hash: 1
+MAP-NEXT: FunctionName: Func2
+MAP-NEXT: ModuleName: Mod1
+MAP-NEXT: InstCount: 2
+MAP-NEXT: IndexOperandHashes:
+MAP-NEXT: - InstIndex: 0
+MAP-NEXT: OpndIndex: 1
+MAP-NEXT: OpndHash: 4
+MAP-NEXT: ...
+
+;--- raw-1.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func2
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 4
+...
+
+;--- merge-1-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+
+;--- raw-2.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func1
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 3
+...
+
+;--- merge-2-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
new file mode 100644
index 00000000000000..6a4e635f638657
--- /dev/null
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
@@ -0,0 +1,36 @@
+# REQUIRES: shell, aarch64-registered-target
+# UNSUPPORTED: system-windows
+
+# Test merge a single object file into a cgdata
+
+RUN: split-file %s %t
+
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
+RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata
+RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
+
+RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll
+RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o
+
+# Merge an object file having cgdata (__llvm_merge)
+RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata
+RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
+CHECK: Stable function map:
+CHECK-NEXT: Unique hash Count: 1
+CHECK-NEXT: Total function Count: 1
+CHECK-NEXT: Mergeable function Count: 0
+
+;--- raw-single.cgtext
+:stable_function_map
+- Hash: 1
+ FunctionName: Func1
+ ModuleName: Mod1
+ InstCount: 2
+ IndexOperandHashes:
+ - InstIndex: 0
+ OpndIndex: 1
+ OpndHash: 3
+...
+
+;--- merge-single-template.ll
+ at .data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-archive.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test
similarity index 91%
rename from llvm/test/tools/llvm-cgdata/merge-archive.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test
index 03eb9106b54562..ee6345247c5be6 100644
--- a/llvm/test/tools/llvm-cgdata/merge-archive.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test
@@ -5,15 +5,15 @@
RUN: split-file %s %t
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
diff --git a/llvm/test/tools/llvm-cgdata/merge-concat.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test
similarity index 93%
rename from llvm/test/tools/llvm-cgdata/merge-concat.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test
index ac0e7a6e29e878..5a3ece05a3f990 100644
--- a/llvm/test/tools/llvm-cgdata/merge-concat.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test
@@ -5,13 +5,13 @@
RUN: split-file %s %t
-# Synthesize two sets of raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata.
# Concatenate them in merge-concat.ll
RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll
RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
diff --git a/llvm/test/tools/llvm-cgdata/merge-double.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test
similarity index 90%
rename from llvm/test/tools/llvm-cgdata/merge-double.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-double.test
index 1ae8064291019e..044a8649cf4adf 100644
--- a/llvm/test/tools/llvm-cgdata/merge-double.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test
@@ -5,15 +5,15 @@
RUN: split-file %s %t
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
diff --git a/llvm/test/tools/llvm-cgdata/merge-single.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test
similarity index 92%
rename from llvm/test/tools/llvm-cgdata/merge-single.test
rename to llvm/test/tools/llvm-cgdata/merge-hashtree-single.test
index 47e3cb3f4f50fb..829c63f0f17a2c 100644
--- a/llvm/test/tools/llvm-cgdata/merge-single.test
+++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test
@@ -11,9 +11,9 @@ RUN: llvm-cgdata --merge %t/merge-empty.o --output %t/merge-empty.cgdata
# No summary appear with the header only cgdata.
RUN: llvm-cgdata --show %t/merge-empty.cgdata | count 0
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
+# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
+RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o
diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
index 483f4662631284..0931cad4bcb7ed 100644
--- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
+++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
@@ -80,8 +80,6 @@ static CGDataAction Action;
static std::optional<CGDataFormat> OutputFormat;
static std::vector<std::string> InputFilenames;
-// TODO: Add a doc, https://llvm.org/docs/CommandGuide/llvm-cgdata.html
-
static void exitWithError(Twine Message, std::string Whence = "",
std::string Hint = "") {
WithColor::error();
@@ -128,6 +126,10 @@ static int convert_main(int argc, const char *argv[]) {
OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree());
Writer.addRecord(Record);
}
+ if (Reader->hasStableFunctionMap()) {
+ StableFunctionMapRecord Record(Reader->releaseStableFunctionMap());
+ Writer.addRecord(Record);
+ }
if (OutputFormat == CGDataFormat::Text) {
if (Error E = Writer.writeText(OS))
@@ -141,10 +143,12 @@ static int convert_main(int argc, const char *argv[]) {
}
static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
- OutlinedHashTreeRecord &GlobalOutlineRecord);
+ OutlinedHashTreeRecord &GlobalOutlineRecord,
+ StableFunctionMapRecord &GlobalFunctionMapRecord);
static bool handleArchive(StringRef Filename, Archive &Arch,
- OutlinedHashTreeRecord &GlobalOutlineRecord) {
+ OutlinedHashTreeRecord &GlobalOutlineRecord,
+ StableFunctionMapRecord &GlobalFunctionMapRecord) {
bool Result = true;
Error Err = Error::success();
for (const auto &Child : Arch.children(Err)) {
@@ -155,7 +159,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch,
if (Error E = NameOrErr.takeError())
exitWithError(std::move(E), Filename);
std::string Name = (Filename + "(" + NameOrErr.get() + ")").str();
- Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord);
+ Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord,
+ GlobalFunctionMapRecord);
}
if (Err)
exitWithError(std::move(Err), Filename);
@@ -163,7 +168,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch,
}
static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
- OutlinedHashTreeRecord &GlobalOutlineRecord) {
+ OutlinedHashTreeRecord &GlobalOutlineRecord,
+ StableFunctionMapRecord &GlobalFunctionMapRecord) {
Expected<std::unique_ptr<object::Binary>> BinOrErr =
object::createBinary(Buffer);
if (Error E = BinOrErr.takeError())
@@ -171,11 +177,12 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
bool Result = true;
if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
- if (Error E =
- CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord))
+ if (Error E = CodeGenDataReader::mergeFromObjectFile(
+ Obj, GlobalOutlineRecord, GlobalFunctionMapRecord))
exitWithError(std::move(E), Filename);
} else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) {
- Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord);
+ Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord,
+ GlobalFunctionMapRecord);
} else {
// TODO: Support for the MachO universal binary format.
errs() << "Error: unsupported binary file: " << Filename << "\n";
@@ -186,26 +193,34 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
}
static bool handleFile(StringRef Filename,
- OutlinedHashTreeRecord &GlobalOutlineRecord) {
+ OutlinedHashTreeRecord &GlobalOutlineRecord,
+ StableFunctionMapRecord &GlobalFunctionMapRecord) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = BuffOrErr.getError())
exitWithErrorCode(EC, Filename);
- return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord);
+ return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord,
+ GlobalFunctionMapRecord);
}
static int merge_main(int argc, const char *argv[]) {
bool Result = true;
OutlinedHashTreeRecord GlobalOutlineRecord;
+ StableFunctionMapRecord GlobalFunctionMapRecord;
for (auto &Filename : InputFilenames)
- Result &= handleFile(Filename, GlobalOutlineRecord);
+ Result &=
+ handleFile(Filename, GlobalOutlineRecord, GlobalFunctionMapRecord);
if (!Result)
exitWithError("failed to merge codegen data files.");
+ GlobalFunctionMapRecord.finalize();
+
CodeGenDataWriter Writer;
if (!GlobalOutlineRecord.empty())
Writer.addRecord(GlobalOutlineRecord);
+ if (!GlobalFunctionMapRecord.empty())
+ Writer.addRecord(GlobalFunctionMapRecord);
std::error_code EC;
raw_fd_ostream OS(OutputFilename, EC,
@@ -249,6 +264,15 @@ static int show_main(int argc, const char *argv[]) {
<< "\n";
OS << " Depth: " << Tree->depth() << "\n";
}
+ if (Reader->hasStableFunctionMap()) {
+ auto Map = Reader->releaseStableFunctionMap();
+ OS << "Stable function map:\n";
+ OS << " Unique hash Count: " << Map->size() << "\n";
+ OS << " Total function Count: "
+ << Map->size(StableFunctionMap::TotalFunctionCount) << "\n";
+ OS << " Mergeable function Count: "
+ << Map->size(StableFunctionMap::MergeableFunctionCount) << "\n";
+ }
return 0;
}
>From 5425db1621336e8a91d4165275e17d5a2e7bed00 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 29 Oct 2024 00:27:53 -0700
Subject: [PATCH 3/5] Address comments from ellishg
---
llvm/lib/CGData/CodeGenDataReader.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index ebeb4ae36f99f3..39513d422c2c98 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -35,14 +35,14 @@ Error CodeGenDataReader::mergeFromObjectFile(
StableFunctionMapRecord &GlobalFunctionMapRecord,
stable_hash *CombinedHash) {
Triple TT = Obj->makeTriple();
- auto CGOutLineName =
+ auto CGOutlineName =
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
auto CGMergeName =
getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
auto processSectionContents = [&](const StringRef &Name,
const StringRef &Contents) {
- if (Name != CGOutLineName && Name != CGMergeName)
+ if (Name != CGOutlineName && Name != CGMergeName)
return;
if (CombinedHash)
*CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
@@ -52,7 +52,7 @@ Error CodeGenDataReader::mergeFromObjectFile(
// we want to merge them into a single cgdata.
// Although it's not a typical workflow, we support this scenario
// by looping over all data in the sections.
- if (Name == CGOutLineName) {
+ if (Name == CGOutlineName) {
while (Data != EndData) {
OutlinedHashTreeRecord LocalOutlineRecord;
LocalOutlineRecord.deserialize(Data);
>From 06a25007d8e9ff1f970e4d343ec0918059f46198 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 30 Aug 2024 00:09:09 -0700
Subject: [PATCH 4/5] [CGData] Global Merge Functions
---
llvm/include/llvm/CGData/CodeGenData.h | 11 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/include/llvm/LinkAllPasses.h | 1 +
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 +
llvm/include/llvm/Transforms/IPO.h | 2 +
.../Transforms/IPO/GlobalMergeFunctions.h | 77 ++
llvm/lib/CodeGen/TargetPassConfig.cpp | 3 +
llvm/lib/LTO/LTO.cpp | 1 +
llvm/lib/Transforms/IPO/CMakeLists.txt | 2 +
.../Transforms/IPO/GlobalMergeFunctions.cpp | 743 ++++++++++++++++++
.../ThinLTO/AArch64/cgdata-merge-local.ll | 62 ++
.../test/ThinLTO/AArch64/cgdata-merge-read.ll | 82 ++
.../AArch64/cgdata-merge-two-rounds.ll | 68 ++
.../ThinLTO/AArch64/cgdata-merge-write.ll | 97 +++
llvm/tools/llvm-lto2/CMakeLists.txt | 1 +
llvm/tools/llvm-lto2/llvm-lto2.cpp | 6 +
16 files changed, 1158 insertions(+)
create mode 100644 llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
create mode 100644 llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 5d7c74725ccef1..da0e412f2a0e03 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -145,6 +145,9 @@ class CodeGenData {
const OutlinedHashTree *getOutlinedHashTree() {
return PublishedHashTree.get();
}
+ const StableFunctionMap *getStableFunctionMap() {
+ return PublishedStableFunctionMap.get();
+ }
/// Returns true if we should write codegen data.
bool emitCGData() { return EmitCGData; }
@@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() {
return CodeGenData::getInstance().hasOutlinedHashTree();
}
+inline bool hasStableFunctionMap() {
+ return CodeGenData::getInstance().hasStableFunctionMap();
+}
+
inline const OutlinedHashTree *getOutlinedHashTree() {
return CodeGenData::getInstance().getOutlinedHashTree();
}
+inline const StableFunctionMap *getStableFunctionMap() {
+ return CodeGenData::getInstance().getStableFunctionMap();
+}
+
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
inline void
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 26f5d63553c5a8..b11d66f4b9d024 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &);
void initializeGCMachineCodeAnalysisPass(PassRegistry &);
void initializeGCModuleInfoPass(PassRegistry &);
void initializeGVNLegacyPassPass(PassRegistry &);
+void initializeGlobalMergeFuncPass(PassRegistry &);
void initializeGlobalMergePass(PassRegistry &);
void initializeGlobalsAAWrapperPassPass(PassRegistry &);
void initializeHardwareLoopsLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 3516b47d29ef36..d1219d6ee2a13e 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -79,6 +79,7 @@ struct ForcePassLinking {
(void)llvm::createDomOnlyViewerWrapperPassPass();
(void)llvm::createDomViewerWrapperPassPass();
(void)llvm::createAlwaysInlinerLegacyPass();
+ (void)llvm::createGlobalMergeFuncPass();
(void)llvm::createGlobalsAAWrapperPass();
(void)llvm::createInstSimplifyLegacyPass();
(void)llvm::createInstructionCombiningPass();
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index ad80c661147d6f..c5b4811815179d 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -77,6 +77,7 @@
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/CFGuard.h"
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
#include "llvm/Transforms/Scalar/ConstantHoisting.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index ee0e35aa618325..86a8654f56997c 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -55,6 +55,8 @@ enum class PassSummaryAction {
Export, ///< Export information to summary.
};
+Pass *createGlobalMergeFuncPass();
+
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
new file mode 100644
index 00000000000000..565be54f89e882
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
@@ -0,0 +1,77 @@
+//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file defines global merge functions pass and related data structure.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StableHashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include <map>
+#include <mutex>
+
+enum class HashFunctionMode {
+ Local,
+ BuildingHashFuncion,
+ UsingHashFunction,
+};
+
+namespace llvm {
+
+// A vector of locations (the pair of (instruction, operand) indices) reachable
+// from a parameter.
+using ParamLocs = SmallVector<IndexPair, 4>;
+// A vector of parameters
+using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
+// A map of stable hash to a vector of stable functions
+
+/// GlobalMergeFunc finds functions which only differ by constants in
+/// certain instructions, e.g. resulting from specialized functions of layout
+/// compatible types.
+/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
+/// hash to find the merge candidate. Similar to the global outliner, we can run
+/// codegen twice to collect function merge candidate in the first round, and
+/// merge functions globally in the second round.
+class GlobalMergeFunc : public ModulePass {
+ HashFunctionMode MergerMode = HashFunctionMode::Local;
+
+ std::unique_ptr<StableFunctionMap> LocalFunctionMap;
+
+public:
+ static char ID;
+
+ GlobalMergeFunc();
+
+ StringRef getPassName() const override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void initializeMergerMode(const Module &M);
+
+ bool runOnModule(Module &M) override;
+
+ /// Analyze module to create stable function into LocalFunctionMap.
+ void analyze(Module &M);
+
+ /// Emit LocalFunctionMap into __llvm_merge section.
+ void emitFunctionMap(Module &M);
+
+ /// Merge functions in the module using the global function map.
+ bool merge(Module &M, const StableFunctionMap *FunctionMap);
+};
+
+} // end namespace llvm
+#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 12225c9946e9fc..1501d5ad95ba42 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -141,6 +141,9 @@ static cl::opt<RunOutliner> EnableMachineOutliner(
"Disable all outlining"),
// Sentinel value for unspecified option.
clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+cl::opt<bool> EnableGlobalMergeFunc(
+ "enable-global-merge-func", cl::Hidden,
+ cl::desc("Enable global merge functions that are based on hash function"));
// Disable the pass to fix unwind information. Whether the pass is included in
// the pipeline is controlled via the target options, this option serves as
// manual override.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 0f53c608512171..3f88c6c475d4d8 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 15cb57399d2460..6a36ed64149f93 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMipo
GlobalDCE.cpp
GlobalOpt.cpp
GlobalSplit.cpp
+ GlobalMergeFunctions.cpp
HotColdSplitting.cpp
IPO.cpp
IROutliner.cpp
@@ -60,6 +61,7 @@ add_llvm_component_library(LLVMipo
Analysis
BitReader
BitWriter
+ CGData
Core
FrontendOpenMP
InstCombine
diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
new file mode 100644
index 00000000000000..599de722814f21
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
@@ -0,0 +1,743 @@
+//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This implements a function merge using function hash while tracking
+// differences in Constants. This uses stable function hash to find potential
+// merge candidates. The first codegen round collects stable function hashes,
+// and determines the merge candidates that match the stable function hashes.
+// The set of parameters pointing to different Constants are also computed
+// during the stable function merge. The second codegen round uses this global
+// function info to optimistically create a merged function in each module
+// context to guarantee correct transformation. Similar to the global outliner,
+// the linker's deduplication (ICF) folds the identical merged functions to save
+// the final binary size.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/CGData/CodeGenData.h"
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/StructuralHash.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#define DEBUG_TYPE "global-merge-func"
+
+using namespace llvm;
+using namespace llvm::support;
+
+static cl::opt<bool>
+ DisableGlobalMerging("disable-global-merging", cl::Hidden,
+ cl::desc("Disable global merging only by ignoring "
+ "the codegen data generation or use. Local "
+ "merging is still enabled within a module."),
+ cl::init(false));
+static cl::opt<unsigned> GlobalMergingMinInstrs(
+ "global-merging-min-instrs",
+ cl::desc("The minimum instruction count required when merging functions."),
+ cl::init(1), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingMaxParams(
+ "global-merging-max-params",
+ cl::desc(
+ "The maximum number of parameters allowed when merging functions."),
+ cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingParamOverhead(
+ "global-merging-param-overhead",
+ cl::desc("The overhead cost associated with each parameter when merging "
+ "functions."),
+ cl::init(2), cl::Hidden);
+static cl::opt<unsigned>
+ GlobalMergingCallOverhead("global-merging-call-overhead",
+ cl::desc("The overhead cost associated with each "
+ "function call when merging functions."),
+ cl::init(1), cl::Hidden);
+static cl::opt<unsigned> GlobalMergingExtraThreshold(
+ "global-merging-extra-threshold",
+ cl::desc("An additional cost threshold that must be exceeded for merging "
+ "to be considered beneficial."),
+ cl::init(0), cl::Hidden);
+
+extern cl::opt<bool> EnableGlobalMergeFunc;
+
+STATISTIC(NumMismatchedFunctionHashGlobalMergeFunction,
+ "Number of mismatched function hash for global merge function");
+STATISTIC(NumMismatchedInstCountGlobalMergeFunction,
+ "Number of mismatched instruction count for global merge function");
+STATISTIC(NumMismatchedConstHashGlobalMergeFunction,
+ "Number of mismatched const hash for global merge function");
+STATISTIC(NumMismatchedModuleIdGlobalMergeFunction,
+ "Number of mismatched Module Id for global merge function");
+STATISTIC(NumGlobalMergeFunctions,
+ "Number of functions that are actually merged using function hash");
+STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
+STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
+STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
+
+/// Returns true if the \opIdx operand of \p CI is the callee operand.
+static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
+ return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
+}
+
+static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) {
+ if (CI->isInlineAsm())
+ return false;
+ Function *Callee = CI->getCalledOperand()
+ ? dyn_cast_or_null<Function>(
+ CI->getCalledOperand()->stripPointerCasts())
+ : nullptr;
+ if (Callee) {
+ if (Callee->isIntrinsic())
+ return false;
+ // objc_msgSend stubs must be called, and can't have their address taken.
+ if (Callee->getName().starts_with("objc_msgSend$"))
+ return false;
+ }
+ if (isCalleeOperand(CI, OpIdx) &&
+ CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) {
+ // The operand is the callee and it has already been signed. Ignore this
+ // because we cannot add another ptrauth bundle to the call instruction.
+ return false;
+ }
+ return true;
+}
+
+bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Call:
+ case Instruction::Invoke:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isEligibleOperandForConstantSharing(const Instruction *I, unsigned OpIdx) {
+ assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+ if (!isEligibleInstrunctionForConstantSharing(I))
+ return false;
+
+ auto Opnd = I->getOperand(OpIdx);
+ if (!isa<Constant>(Opnd))
+ return false;
+
+ if (const auto *CI = dyn_cast<CallBase>(I))
+ return canParameterizeCallOperand(CI, OpIdx);
+
+ return true;
+}
+
+/// Returns true if function \p F is eligible for merging.
+bool isEligibleFunction(Function *F) {
+ if (F->isDeclaration())
+ return false;
+
+ if (F->hasFnAttribute(llvm::Attribute::NoMerge))
+ return false;
+
+ if (F->hasAvailableExternallyLinkage()) {
+ return false;
+ }
+
+ if (F->getFunctionType()->isVarArg()) {
+ return false;
+ }
+
+ if (F->getCallingConv() == CallingConv::SwiftTail)
+ return false;
+
+ // if function contains callsites with musttail, if we merge
+ // it, the merged function will have the musttail callsite, but
+ // the number of parameters can change, thus the parameter count
+ // of the callsite will mismatch with the function itself.
+ // if (IgnoreMusttailFunction) {
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (CB && CB->isMustTailCall())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+isEligibleInstrunctionForConstantSharingLocal(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Call:
+ case Instruction::Invoke:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
+ assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+ if (!isEligibleInstrunctionForConstantSharingLocal(I))
+ return false;
+
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return false;
+
+ if (const auto *CI = dyn_cast<CallBase>(I))
+ return canParameterizeCallOperand(CI, OpIdx);
+
+ return true;
+}
+
+// copy from merge functions.cpp
+static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy->isStructTy()) {
+ assert(DestTy->isStructTy());
+ assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+ Value *Result = PoisonValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+ Value *Element =
+ createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+ DestTy->getStructElementType(I));
+
+ Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isStructTy());
+ if (auto *SrcAT = dyn_cast<ArrayType>(SrcTy)) {
+ auto *DestAT = dyn_cast<ArrayType>(DestTy);
+ assert(DestAT);
+ assert(SrcAT->getNumElements() == DestAT->getNumElements());
+ Value *Result = UndefValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) {
+ Value *Element =
+ createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+ DestAT->getElementType());
+
+ Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isArrayTy());
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ return Builder.CreateIntToPtr(V, DestTy);
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ return Builder.CreatePtrToInt(V, DestTy);
+ else
+ return Builder.CreateBitCast(V, DestTy);
+}
+
+void GlobalMergeFunc::analyze(Module &M) {
+ ++NumAnalyzedModues;
+ for (Function &Func : M) {
+ ++NumAnalyzedFunctions;
+ if (isEligibleFunction(&Func)) {
+ ++NumEligibleFunctions;
+
+ auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
+
+ // Convert the map to a vector for a serialization-friendly format.
+ IndexOperandHashVecType IndexOperandHashes;
+ for (auto &Pair : *FI.IndexOperandHashMap)
+ IndexOperandHashes.emplace_back(Pair);
+
+ StableFunction SF(FI.FunctionHash, get_stable_name(Func.getName()).str(),
+ M.getModuleIdentifier(), FI.IndexInstruction->size(),
+ std::move(IndexOperandHashes));
+
+ LocalFunctionMap->insert(SF);
+ }
+ }
+}
+
+/// Tuple to hold function info to process merging.
+struct FuncMergeInfo {
+ StableFunctionEntry *SF;
+ Function *F;
+ std::unique_ptr<IndexInstrMap> IndexInstruction;
+};
+
+// Given the func info, and the parameterized locations, create and return
+// a new merged function by replacing the original constants with the new
+// parameters.
+static Function *createMergedFunction(FuncMergeInfo &FI,
+ ArrayRef<Type *> ConstParamTypes,
+ const ParamLocsVecTy &ParamLocsVec) {
+ // Synthesize a new merged function name by appending ".Tgm" to the root
+ // function's name.
+ auto *MergedFunc = FI.F;
+ auto NewFunctionName = MergedFunc->getName().str() + ".Tgm";
+ auto *M = MergedFunc->getParent();
+ assert(!M->getFunction(NewFunctionName));
+
+ FunctionType *OrigTy = MergedFunc->getFunctionType();
+ // Get the original params' types.
+ SmallVector<Type *> ParamTypes(OrigTy->param_begin(), OrigTy->param_end());
+ // Append const parameter types that are passed in.
+ ParamTypes.append(ConstParamTypes.begin(), ConstParamTypes.end());
+ FunctionType *FuncType =
+ FunctionType::get(OrigTy->getReturnType(), ParamTypes, false);
+
+ // Declare a new function
+ Function *NewFunction =
+ Function::Create(FuncType, MergedFunc->getLinkage(), NewFunctionName);
+ if (auto *SP = MergedFunc->getSubprogram())
+ NewFunction->setSubprogram(SP);
+ NewFunction->copyAttributesFrom(MergedFunc);
+ NewFunction->setDLLStorageClass(GlobalValue::DefaultStorageClass);
+
+ NewFunction->setLinkage(GlobalValue::InternalLinkage);
+ NewFunction->addFnAttr(Attribute::NoInline);
+
+ // Add the new function before the root function.
+ M->getFunctionList().insert(MergedFunc->getIterator(), NewFunction);
+
+ // Move the body of MergedFunc into the NewFunction.
+ NewFunction->splice(NewFunction->begin(), MergedFunc);
+
+ // Update the original args by the new args.
+ auto NewArgIter = NewFunction->arg_begin();
+ for (Argument &OrigArg : MergedFunc->args()) {
+ Argument &NewArg = *NewArgIter++;
+ OrigArg.replaceAllUsesWith(&NewArg);
+ }
+
+ // Replace the original Constants by the new args.
+ unsigned NumOrigArgs = MergedFunc->arg_size();
+ for (unsigned ParamIdx = 0; ParamIdx < ParamLocsVec.size(); ++ParamIdx) {
+ Argument *NewArg = NewFunction->getArg(NumOrigArgs + ParamIdx);
+ for (auto [InstIndex, OpndIndex] : ParamLocsVec[ParamIdx]) {
+ auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+ auto *OrigC = Inst->getOperand(OpndIndex);
+ if (OrigC->getType() != NewArg->getType()) {
+ IRBuilder<> Builder(Inst->getParent(), Inst->getIterator());
+ Inst->setOperand(OpndIndex,
+ createCast(Builder, NewArg, OrigC->getType()));
+ } else
+ Inst->setOperand(OpndIndex, NewArg);
+ }
+ }
+
+ return NewFunction;
+}
+
+// Given the original function (Thunk) and the merged function (ToFunc), create
+// a thunk to the merged function.
+
+static void createThunk(FuncMergeInfo &FI, ArrayRef<Constant *> Params,
+ Function *ToFunc) {
+ auto *Thunk = FI.F;
+
+ assert(Thunk->arg_size() + Params.size() ==
+ ToFunc->getFunctionType()->getNumParams());
+ Thunk->dropAllReferences();
+
+ BasicBlock *BB = BasicBlock::Create(Thunk->getContext(), "", Thunk);
+ IRBuilder<> Builder(BB);
+
+ SmallVector<Value *> Args;
+ unsigned ParamIdx = 0;
+ FunctionType *ToFuncTy = ToFunc->getFunctionType();
+
+ // Add arguments which are passed through Thunk.
+ for (Argument &AI : Thunk->args()) {
+ Args.push_back(createCast(Builder, &AI, ToFuncTy->getParamType(ParamIdx)));
+ ++ParamIdx;
+ }
+
+ // Add new arguments defined by Params.
+ for (auto *Param : Params) {
+ assert(ParamIdx < ToFuncTy->getNumParams());
+ // FIXME: do not support signing
+ Args.push_back(
+ createCast(Builder, Param, ToFuncTy->getParamType(ParamIdx)));
+ ++ParamIdx;
+ }
+
+ CallInst *CI = Builder.CreateCall(ToFunc, Args);
+ bool isSwiftTailCall = ToFunc->getCallingConv() == CallingConv::SwiftTail &&
+ Thunk->getCallingConv() == CallingConv::SwiftTail;
+ CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
+ : llvm::CallInst::TCK_Tail);
+ CI->setCallingConv(ToFunc->getCallingConv());
+ CI->setAttributes(ToFunc->getAttributes());
+ if (Thunk->getReturnType()->isVoidTy()) {
+ Builder.CreateRetVoid();
+ } else {
+ Builder.CreateRet(createCast(Builder, CI, Thunk->getReturnType()));
+ }
+}
+
+// Check if the old merged/optimized IndexOperandHashMap is compatible with
+// the current IndexOperandHashMap. An operand hash may not be stable across
+// different builds due to varying modules combined. To address this, we relax
+// the hash check condition by comparing Const hash patterns instead of absolute
+// hash values. For example, let's assume we have three Consts located at idx1,
+// idx3, and idx6, where their corresponding hashes are hash1, hash2, and hash1
+// in the old merged map below:
+// Old (Merged): [(idx1, hash1), (idx3, hash2), (idx6, hash1)]
+// Current: [(idx1, hash1'), (idx3, hash2'), (idx6, hash1')]
+// If the current function also has three Consts in the same locations,
+// with hash sequences hash1', hash2', and hash1' where the first and third
+// are the same as the old hash sequences, we consider them matched.
+static bool checkConstHashCompatible(
+ const DenseMap<IndexPair, stable_hash> &OldInstOpndIndexToConstHash,
+ const DenseMap<IndexPair, stable_hash> &CurrInstOpndIndexToConstHash) {
+
+ DenseMap<stable_hash, stable_hash> OldHashToCurrHash;
+ for (const auto &[Index, OldHash] : OldInstOpndIndexToConstHash) {
+ auto It = CurrInstOpndIndexToConstHash.find(Index);
+ if (It == CurrInstOpndIndexToConstHash.end())
+ return false;
+
+ auto CurrHash = It->second;
+ auto J = OldHashToCurrHash.find(OldHash);
+ if (J == OldHashToCurrHash.end())
+ OldHashToCurrHash.insert({OldHash, CurrHash});
+ else if (J->second != CurrHash)
+ return false;
+ }
+
+ return true;
+}
+
+// Validate the locations pointed by a param has the same hash and Constant.
+static bool checkConstLocationCompatible(const StableFunctionEntry &SF,
+ const IndexInstrMap &IndexInstruction,
+ const ParamLocsVecTy &ParamLocsVec) {
+ for (auto &ParamLocs : ParamLocsVec) {
+ std::optional<stable_hash> OldHash;
+ std::optional<Constant *> OldConst;
+ for (auto &Loc : ParamLocs) {
+ assert(SF.IndexOperandHashMap->count(Loc));
+ auto CurrHash = SF.IndexOperandHashMap.get()->at(Loc);
+ auto [InstIndex, OpndIndex] = Loc;
+ assert(InstIndex < IndexInstruction.size());
+ const auto *Inst = IndexInstruction.lookup(InstIndex);
+ auto *CurrConst = cast<Constant>(Inst->getOperand(OpndIndex));
+ if (!OldHash) {
+ OldHash = CurrHash;
+ OldConst = CurrConst;
+ } else if (CurrConst != *OldConst || CurrHash != *OldHash)
+ return false;
+ }
+ }
+ return true;
+}
+
+static ParamLocsVecTy
+computeParamInfo(const SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS) {
+ std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
+ auto &RSF = *SFS[0];
+ unsigned StableFunctionCount = SFS.size();
+
+ for (auto &[IndexPair, Hash] : *RSF.IndexOperandHashMap) {
+ // Const hash sequence across stable functions.
+ // We will allocate a parameter per unique hash squence.
+ // can't use SmallVector as key
+ std::vector<stable_hash> ConstHashSeq;
+ ConstHashSeq.push_back(Hash);
+ bool Identical = true;
+ for (unsigned J = 1; J < StableFunctionCount; ++J) {
+ auto &SF = SFS[J];
+ assert(SF->IndexOperandHashMap->count(IndexPair));
+ auto SHash = (*SF->IndexOperandHashMap)[IndexPair];
+ if (Hash != SHash)
+ Identical = false;
+ ConstHashSeq.push_back(SHash);
+ }
+
+ if (Identical)
+ continue;
+
+ // For each unique Const hash sequence (parameter), add the locations.
+ HashSeqToLocs[ConstHashSeq].push_back(IndexPair);
+ }
+
+ ParamLocsVecTy ParamLocsVec;
+ for (auto &[HashSeq, Locs] : HashSeqToLocs) {
+ ParamLocsVec.push_back(std::move(Locs));
+ std::sort(
+ ParamLocsVec.begin(), ParamLocsVec.end(),
+ [&](const ParamLocs &L, const ParamLocs &R) { return L[0] < R[0]; });
+ }
+ return ParamLocsVec;
+}
+
+static bool
+isProfitable(const SmallVector<std::unique_ptr<StableFunctionEntry>> &SFS,
+ const Function *F) {
+ // No interest if the number of candidates are less than 2.
+ unsigned StableFunctionCount = SFS.size();
+ if (StableFunctionCount < 2)
+ return false;
+
+ unsigned InstCount = SFS[0]->InstCount;
+ if (InstCount < GlobalMergingMinInstrs)
+ return false;
+
+ unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
+ unsigned TotalParamCount = ParamCount + F->getFunctionType()->getNumParams();
+ if (TotalParamCount > GlobalMergingMaxParams)
+ return false;
+
+ unsigned Benefit = InstCount * (StableFunctionCount - 1);
+ unsigned Cost =
+ (GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
+ StableFunctionCount +
+ GlobalMergingExtraThreshold;
+
+ bool isProfitable = Benefit > Cost;
+ LLVM_DEBUG(
+ dbgs() << "isProfitable: Function = " << F->getName() << ", "
+ << "StableFunctionCount = " << StableFunctionCount
+ << ", InstCount = " << InstCount << ", ParamCount = " << ParamCount
+ << ", Benefit = " << Benefit << ", Cost = " << Cost
+ << ", Result = " << (isProfitable ? "true" : "false") << "\n");
+ return isProfitable;
+}
+
+bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
+ bool Changed = false;
+
+ // Build a map from stable function name to function.
+ StringMap<Function *> StableNameToFuncMap;
+ for (auto &F : M)
+ StableNameToFuncMap[get_stable_name(F.getName())] = &F;
+ // Track merged functions
+ DenseSet<Function *> MergedFunctions;
+
+ auto ModId = M.getModuleIdentifier();
+ for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
+ // Compute the parameter locations based on the unique hash sequences
+ // across the candidates.
+ auto ParamLocsVec = computeParamInfo(SFS);
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash << " with Params "
+ << ParamLocsVec.size() << "\n";
+ });
+
+ Function *MergedFunc = nullptr;
+ std::string MergedModId;
+ SmallVector<FuncMergeInfo> FuncMergeInfos;
+ for (auto &SF : SFS) {
+ // Get the function from the stable name.
+ auto I = StableNameToFuncMap.find(
+ *FunctionMap->getNameForId(SF->FunctionNameId));
+ if (I == StableNameToFuncMap.end())
+ continue;
+ Function *F = I->second;
+ assert(F);
+ // Skip if the function has been merged before.
+ if (MergedFunctions.count(F))
+ continue;
+ // Consider the function if it is eligible for merging.
+ if (!isEligibleFunction(F))
+ continue;
+
+ auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
+ uint64_t FuncHash = FI.FunctionHash;
+ if (Hash != FuncHash) {
+ ++NumMismatchedFunctionHashGlobalMergeFunction;
+ continue;
+ }
+
+ if (SF->InstCount != FI.IndexInstruction->size()) {
+ ++NumMismatchedInstCountGlobalMergeFunction;
+ continue;
+ }
+ bool HasValidSharedConst = true;
+ for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
+ auto [InstIndex, OpndIndex] = Index;
+ assert(InstIndex < FI.IndexInstruction->size());
+ auto *Inst = FI.IndexInstruction->lookup(InstIndex);
+ if (!isEligibleOperandForConstantSharing(Inst, OpndIndex)) {
+ HasValidSharedConst = false;
+ break;
+ }
+ }
+ if (!HasValidSharedConst) {
+ ++NumMismatchedConstHashGlobalMergeFunction;
+ continue;
+ }
+ if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
+ *FI.IndexOperandHashMap)) {
+ ++NumMismatchedConstHashGlobalMergeFunction;
+ continue;
+ }
+ if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
+ ParamLocsVec)) {
+ ++NumMismatchedConstHashGlobalMergeFunction;
+ continue;
+ }
+
+ if (!isProfitable(SFS, F))
+ break;
+
+ if (MergedFunc) {
+ // Check if the matched functions fall into the same (first) module.
+ // This module check is not strictly necessary as the functions can move
+ // around. We just want to avoid merging functions from different
+ // modules than the first one in the functon map, as they may not end up
+ // with not being ICFed.
+ if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
+ ++NumMismatchedModuleIdGlobalMergeFunction;
+ continue;
+ }
+ } else {
+ MergedFunc = F;
+ MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
+ }
+
+ FuncMergeInfos.push_back({SF.get(), F, std::move(FI.IndexInstruction)});
+ MergedFunctions.insert(F);
+ }
+ unsigned FuncMergeInfoSize = FuncMergeInfos.size();
+ if (FuncMergeInfoSize == 0)
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
+ << " in " << ModId << "\n";
+ });
+ for (auto &FMI : FuncMergeInfos) {
+ Changed = true;
+
+ // We've already validated all locations of constant operands pointed by
+ // the parameters. Just use the first one to bookkeep the original
+ // constants for each parameter
+ SmallVector<Constant *> Params;
+ SmallVector<Type *> ParamTypes;
+ for (auto &ParamLocs : ParamLocsVec) {
+ assert(!ParamLocs.empty());
+ auto &[InstIndex, OpndIndex] = ParamLocs[0];
+ auto *Inst = FMI.IndexInstruction->lookup(InstIndex);
+ auto *Opnd = cast<Constant>(Inst->getOperand(OpndIndex));
+ Params.push_back(Opnd);
+ ParamTypes.push_back(Opnd->getType());
+ }
+
+ // Create a merged function derived from the first function in the current
+ // module context.
+ Function *MergedFunc =
+ createMergedFunction(FMI, ParamTypes, ParamLocsVec);
+
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Merged function (hash:" << FMI.SF->Hash
+ << ") " << MergedFunc->getName() << " generated from "
+ << FMI.F->getName() << ":\n";
+ MergedFunc->dump();
+ });
+
+ // Create a thunk to the merged function.
+ createThunk(FMI, Params, MergedFunc);
+ LLVM_DEBUG({
+ dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
+ FMI.F->dump();
+ });
+ ++NumGlobalMergeFunctions;
+ }
+ }
+
+ return Changed;
+}
+
+char GlobalMergeFunc::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalMergeFunc, "global-merge-func",
+ "Global merge function pass", false, false)
+INITIALIZE_PASS_END(GlobalMergeFunc, "global-merge-func",
+ "Global merge function pass", false, false)
+
+StringRef GlobalMergeFunc::getPassName() const {
+ return "Global Merge Functions";
+}
+
+void GlobalMergeFunc::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+}
+
+GlobalMergeFunc::GlobalMergeFunc() : ModulePass(ID) {
+ initializeGlobalMergeFuncPass(*llvm::PassRegistry::getPassRegistry());
+}
+
+namespace llvm {
+Pass *createGlobalMergeFuncPass() { return new GlobalMergeFunc(); }
+} // namespace llvm
+
+void GlobalMergeFunc::initializeMergerMode(const Module &M) {
+ LocalFunctionMap = std::make_unique<StableFunctionMap>();
+
+ if (DisableGlobalMerging)
+ return;
+
+ if (auto *IndexWrapperPass =
+ getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
+ auto *TheIndex = IndexWrapperPass->getIndex();
+ // (Full)LTO module does not have functions added to the index.
+ // In this case, we run a local merger without using codegen data.
+ if (TheIndex && !TheIndex->hasExportedFunctions(M))
+ return;
+ }
+
+ if (cgdata::emitCGData())
+ MergerMode = HashFunctionMode::BuildingHashFuncion;
+ else if (cgdata::hasStableFunctionMap())
+ MergerMode = HashFunctionMode::UsingHashFunction;
+}
+
+void GlobalMergeFunc::emitFunctionMap(Module &M) {
+ LLVM_DEBUG({
+ dbgs() << "Emit function map. Size: " << LocalFunctionMap->size() << "\n";
+ });
+ SmallVector<char> Buf;
+ raw_svector_ostream OS(Buf);
+
+ StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get());
+
+ std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
+ OS.str(), "in-memory stable function map", false);
+
+ Triple TT(M.getTargetTriple());
+ embedBufferInModule(M, *Buffer.get(),
+ getCodeGenDataSectionName(CG_merge, TT.getObjectFormat()),
+ Align(4));
+}
+
+bool GlobalMergeFunc::runOnModule(Module &M) {
+ initializeMergerMode(M);
+
+ const StableFunctionMap *FuncMap;
+ if (MergerMode == HashFunctionMode::UsingHashFunction) {
+ // Use the prior CG data to optimistically create global merge candidates.
+ FuncMap = cgdata::getStableFunctionMap();
+ } else {
+ analyze(M);
+ // Emit the local function map to the custom section, __llvm_merge before
+ // finalizing it.
+ if (MergerMode == HashFunctionMode::BuildingHashFuncion &&
+ !LocalFunctionMap->empty())
+ emitFunctionMap(M);
+ LocalFunctionMap->finalize();
+ FuncMap = LocalFunctionMap.get();
+ }
+
+ return merge(M, FuncMap);
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
new file mode 100644
index 00000000000000..4f5f5c0b5b26d9
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll
@@ -0,0 +1,62 @@
+; This test checks if two similar functions, f1 and f2, can be merged locally within a single module
+; while parameterizing a difference in their global variables, g1 and g2.
+; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm,
+; which are tail-called from thunks g1 and g2 respectively.
+; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC).
+
+; RUN: opt -module-summary -module-hash %s -o %t
+
+; RUN: llvm-lto2 run -enable-global-merge-func=false %t -o %tout-nomerge \
+; RUN: -r %t,_f1,px \
+; RUN: -r %t,_f2,px \
+; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l
+; RUN: llvm-nm %tout-nomerge.1 | FileCheck %s --check-prefix=NOMERGE
+; RUN: llvm-lto2 run -enable-global-merge-func=true %t -o %tout-merge \
+; RUN: -r %t,_f1,px \
+; RUN: -r %t,_f2,px \
+; RUN: -r %t,_g,l -r %t,_g1,l -r %t,_g2,l
+; RUN: llvm-nm %tout-merge.1 | FileCheck %s --check-prefix=GLOBALMERGE
+; RUN: llvm-objdump -d %tout-merge.1 | FileCheck %s --check-prefix=THUNK
+
+; NOMERGE-NOT: _f1.Tgm
+; GLOBALMERGE: _f1.Tgm
+; GLOBALMERGE: _f2.Tgm
+
+; THUNK: <_f1>:
+; THUNK-NEXT: adrp x1,
+; THUNK-NEXT: ldr x1, [x1]
+; THUNK-NEXT: b
+
+; THUNK: <_f2>:
+; THUNK-NEXT: adrp x1,
+; THUNK-NEXT: ldr x1, [x1]
+; THUNK-NEXT: b
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
new file mode 100644
index 00000000000000..da756e7f15e68e
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll
@@ -0,0 +1,82 @@
+; This test demonstrates how similar functions are handled during global outlining.
+; Currently, we do not attempt to share an merged function for identical sequences.
+; Instead, each merging instance is created uniquely.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; First, run with -codegen-data-generate=true to generate the cgdata in the object files.
+; Using llvm-cgdata, merge the cg data.
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2
+
+; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary.
+; Each module has its own merging instance as it is matched against the merged cgdata.
+; RUN: llvm-lto2 run -enable-global-merge-func=true \
+; RUN: -codegen-data-use-path=%tout.cgdata \
+; RUN: %t-foo.bc %t-goo.bc -o %tout-read \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1
+; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2
+; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1
+; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2
+
+; READ1: _f1.Tgm
+; READ2: _f2.Tgm
+
+; THUNK1: <_f1>:
+; THUNK1-NEXT: adrp x1,
+; THUNK1-NEXT: ldr x1, [x1]
+; THUNK1-NEXT: b
+
+; THUNK2: <_f2>:
+; THUNK2-NEXT: adrp x1,
+; THUNK2-NEXT: ldr x1, [x1]
+; THUNK2-NEXT: b
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
new file mode 100644
index 00000000000000..06880e3d268189
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-two-rounds.ll
@@ -0,0 +1,68 @@
+; TODO: This test checks if the how similar functions are handled during global outlining
+; by repeating the codegen via -codegen-data-thinlto-two-rounds=true.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-thinlto-two-rounds=true %t-foo.bc %t-goo.bc -o %tout \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout.1 | FileCheck %s --check-prefix=OUT1
+; RUN: llvm-nm %tout.2 | FileCheck %s --check-prefix=OUT2
+; RUN: llvm-objdump -d %tout.1 | FileCheck %s --check-prefix=THUNK1
+; RUN: llvm-objdump -d %tout.2 | FileCheck %s --check-prefix=THUNK2
+
+; OUT1: _f1.Tgm
+; OUT2: _f2.Tgm
+
+; THUNK1: <_f1>:
+; THUNK1-NEXT: adrp x1,
+; THUNK1-NEXT: ldr x1, [x1]
+; THUNK1-NEXT: b
+
+; THUNK2: <_f2>:
+; THUNK2-NEXT: adrp x1,
+; THUNK2-NEXT: ldr x1, [x1]
+; THUNK2-NEXT: b
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
new file mode 100644
index 00000000000000..a4022eb885b43f
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
@@ -0,0 +1,97 @@
+; This test verifies whether a stable function is encoded into the __llvm_merge section
+; when the -codegen-data-generate flag is used under -enable-global-merge-func=true.
+
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
+; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=false %t-foo.bc %t-goo.bc -o %tout-nowrite \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=NOWRITE
+; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=NOWRITE
+
+; No merge instance is locally created as each module has a singltone function.
+; NOWRITE-NOT: _f1.Tgm
+; NOWRITE-NOT: _f2.Tgm
+
+; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-nowrite \
+; RUN: -r %t-foo.bc,_f1,px \
+; RUN: -r %t-goo.bc,_f2,px \
+; RUN: -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
+; RUN: -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
+; RUN: llvm-nm %tout-nowrite.1 | FileCheck %s --check-prefix=WRITE
+; RUN: llvm-nm %tout-nowrite.2 | FileCheck %s --check-prefix=WRITE
+; RUN: llvm-objdump -h %tout-nowrite.1 | FileCheck %s --check-prefix=SECTNAME
+; RUN: llvm-objdump -h %tout-nowrite.2 | FileCheck %s --check-prefix=SECTNAME
+
+; On a write mode, no merging happens yet for each module.
+; We only create stable functions and publish them into __llvm_merge section for each object.
+; WRITE-NOT: _f1.Tgm
+; WRITE-NOT: _f2.Tgm
+; SECTNAME: __llvm_merge
+
+; Merge the cgdata using llvm-cgdata.
+; We now validate the content of the merged cgdata.
+; Two functions have the same hash with only one different constnat at a same location.
+; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
+; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s
+
+; CHECK: - Hash: [[#%d,HASH:]]
+; CHECK-NEXT: FunctionName: f1
+; CHECK-NEXT: ModuleName: {{.*}}
+; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT:]]
+; CHECK-NEXT: IndexOperandHashes:
+; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX:]]
+; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX:]]
+; CHECK-NEXT: OpndHash: {{.*}}
+
+; CHECK: - Hash: [[#%d,HASH]]
+; CHECK-NEXT: FunctionName: f2
+; CHECK-NEXT: ModuleName: {{.*}}
+; CHECK-NEXT: InstCount: [[#%d,INSTCOUNT]]
+; CHECK-NEXT: IndexOperandHashes:
+; CHECK-NEXT: - InstIndex: [[#%d,INSTINDEX]]
+; CHECK-NEXT: OpndIndex: [[#%d,OPNDINDEX]]
+; CHECK-NEXT: OpndHash: {{.*}}
+
+;--- foo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f1(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
+
+;--- goo.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-ios12.0.0"
+
+ at g = external local_unnamed_addr global [0 x i32], align 4
+ at g1 = external global i32, align 4
+ at g2 = external global i32, align 4
+
+define i32 @f2(i32 %a) {
+entry:
+ %idxprom = sext i32 %a to i64
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* @g2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, 1
+ ret i32 %add
+}
diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt
index 3b4644d6e27715..6ddccc7f17442f 100644
--- a/llvm/tools/llvm-lto2/CMakeLists.txt
+++ b/llvm/tools/llvm-lto2/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
BitReader
CodeGen
Core
+ IPO
Linker
LTO
MC
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index d4f022ef021a44..0dc6623552a4bd 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Transforms/IPO.h"
#include <atomic>
using namespace llvm;
@@ -195,6 +196,7 @@ static cl::opt<bool> TryUseNewDbgInfoFormat(
extern cl::opt<bool> UseNewDbgInfoFormat;
extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat;
extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
+extern cl::opt<bool> EnableGlobalMergeFunc;
static void check(Error E, std::string Msg) {
if (!E)
@@ -374,6 +376,10 @@ static int run(int argc, char **argv) {
if (DI.getSeverity() == DS_Error)
HasErrors = true;
};
+ Conf.PreCodeGenPassesHook = [](legacy::PassManager &pm) {
+ if (EnableGlobalMergeFunc)
+ pm.add(createGlobalMergeFuncPass());
+ };
LTO::LTOKind LTOMode = LTO::LTOK_Default;
>From 319415409340ef4af8dc4d980c50a413190846c7 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 18 Oct 2024 10:12:21 -0700
Subject: [PATCH 5/5] Address comments from tschuett
---
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 -
llvm/include/llvm/Transforms/IPO.h | 3 ++
.../Transforms/IPO/GlobalMergeFunctions.h | 45 ++++++++--------
.../Transforms/IPO/GlobalMergeFunctions.cpp | 53 +++++++++----------
4 files changed, 51 insertions(+), 51 deletions(-)
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index c5b4811815179d..ad80c661147d6f 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -77,7 +77,6 @@
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/CFGuard.h"
-#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
#include "llvm/Transforms/Scalar/ConstantHoisting.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index 86a8654f56997c..28cf330ad20812 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -55,6 +55,9 @@ enum class PassSummaryAction {
Export, ///< Export information to summary.
};
+/// createGlobalMergeFuncPass - This pass generates merged instances by
+/// parameterizing distinct constants across similar functions, utilizing stable
+/// function hash information.
Pass *createGlobalMergeFuncPass();
} // End llvm namespace
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
index 565be54f89e882..395aeda73b2d0f 100644
--- a/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
+++ b/llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h
@@ -5,23 +5,29 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-///
-/// This file defines global merge functions pass and related data structure.
-///
+//
+// This pass defines the implementation of a function merging mechanism
+// that utilizes a stable function hash to track differences in constants and
+// identify potential merge candidates. The process involves two rounds:
+// 1. The first round collects stable function hashes and identifies merge
+// candidates with matching hashes. It also computes the set of parameters
+// that point to different constants during the stable function merge.
+// 2. The second round leverages this collected global function information to
+// optimistically create a merged function in each module context, ensuring
+// correct transformation.
+// Similar to the global outliner, this approach uses the linker's deduplication
+// (ICF) to fold identical merged functions, thereby reducing the final binary
+// size. The work is inspired by the concepts discussed in the following paper:
+// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
+//
//===----------------------------------------------------------------------===//
-#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
-#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#ifndef LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
+#define LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StableHashing.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/CGData/StableFunctionMap.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include <map>
-#include <mutex>
enum class HashFunctionMode {
Local,
@@ -36,15 +42,10 @@ namespace llvm {
using ParamLocs = SmallVector<IndexPair, 4>;
// A vector of parameters
using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
-// A map of stable hash to a vector of stable functions
-
-/// GlobalMergeFunc finds functions which only differ by constants in
-/// certain instructions, e.g. resulting from specialized functions of layout
-/// compatible types.
-/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
-/// hash to find the merge candidate. Similar to the global outliner, we can run
-/// codegen twice to collect function merge candidate in the first round, and
-/// merge functions globally in the second round.
+
+/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
+/// using stable function hashes. It identifies and merges functions with
+/// matching hashes across modules to optimize binary size.
class GlobalMergeFunc : public ModulePass {
HashFunctionMode MergerMode = HashFunctionMode::Local;
@@ -69,9 +70,9 @@ class GlobalMergeFunc : public ModulePass {
/// Emit LocalFunctionMap into __llvm_merge section.
void emitFunctionMap(Module &M);
- /// Merge functions in the module using the global function map.
+ /// Merge functions in the module using the given function map.
bool merge(Module &M, const StableFunctionMap *FunctionMap);
};
} // end namespace llvm
-#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
+#endif // LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
diff --git a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
index 599de722814f21..cec4d4caf63607 100644
--- a/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp
@@ -6,16 +6,19 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO: This implements a function merge using function hash while tracking
-// differences in Constants. This uses stable function hash to find potential
-// merge candidates. The first codegen round collects stable function hashes,
-// and determines the merge candidates that match the stable function hashes.
-// The set of parameters pointing to different Constants are also computed
-// during the stable function merge. The second codegen round uses this global
-// function info to optimistically create a merged function in each module
-// context to guarantee correct transformation. Similar to the global outliner,
-// the linker's deduplication (ICF) folds the identical merged functions to save
-// the final binary size.
+// This pass defines the implementation of a function merging mechanism
+// that utilizes a stable function hash to track differences in constants and
+// create potential merge candidates. The process involves two rounds:
+// 1. The first round collects stable function hashes and identifies merge
+// candidates with matching hashes. It also computes the set of parameters
+// that point to different constants during the stable function merge.
+// 2. The second round leverages this collected global function information to
+// optimistically create a merged function in each module context, ensuring
+// correct transformation.
+// Similar to the global outliner, this approach uses the linker's deduplication
+// (ICF) to fold identical merged functions, thereby reducing the final binary
+// size. The work is inspired by the concepts discussed in the following paper:
+// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
//
//===----------------------------------------------------------------------===//
@@ -23,9 +26,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/CGData/CodeGenData.h"
-#include "llvm/CGData/StableFunctionMap.h"
-#include "llvm/CodeGen/MachineStableHash.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/InitializePasses.h"
@@ -84,7 +84,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
-/// Returns true if the \opIdx operand of \p CI is the callee operand.
+/// Returns true if the \OpIdx operand of \p CI is the callee operand.
static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
}
@@ -148,22 +148,19 @@ bool isEligibleFunction(Function *F) {
if (F->hasFnAttribute(llvm::Attribute::NoMerge))
return false;
- if (F->hasAvailableExternallyLinkage()) {
+ if (F->hasAvailableExternallyLinkage())
return false;
- }
- if (F->getFunctionType()->isVarArg()) {
+ if (F->getFunctionType()->isVarArg())
return false;
- }
if (F->getCallingConv() == CallingConv::SwiftTail)
return false;
- // if function contains callsites with musttail, if we merge
+ // If function contains callsites with musttail, if we merge
// it, the merged function will have the musttail callsite, but
// the number of parameters can change, thus the parameter count
// of the callsite will mismatch with the function itself.
- // if (IgnoreMusttailFunction) {
for (const BasicBlock &BB : *F) {
for (const Instruction &I : BB) {
const auto *CB = dyn_cast<CallBase>(&I);
@@ -203,7 +200,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
return true;
}
-// copy from merge functions.cpp
static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
Type *SrcTy = V->getType();
if (SrcTy->isStructTy()) {
@@ -252,7 +248,8 @@ void GlobalMergeFunc::analyze(Module &M) {
auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
- // Convert the map to a vector for a serialization-friendly format.
+ // Convert the operand map to a vector for a serialization-friendly
+ // format.
IndexOperandHashVecType IndexOperandHashes;
for (auto &Pair : *FI.IndexOperandHashMap)
IndexOperandHashes.emplace_back(Pair);
@@ -595,7 +592,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
// This module check is not strictly necessary as the functions can move
// around. We just want to avoid merging functions from different
// modules than the first one in the functon map, as they may not end up
- // with not being ICFed.
+ // with not being ICFed by the linker.
if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
++NumMismatchedModuleIdGlobalMergeFunction;
continue;
@@ -616,12 +613,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
<< " in " << ModId << "\n";
});
+
for (auto &FMI : FuncMergeInfos) {
Changed = true;
// We've already validated all locations of constant operands pointed by
- // the parameters. Just use the first one to bookkeep the original
- // constants for each parameter
+ // the parameters. Populate parameters pointing to the original constants.
SmallVector<Constant *> Params;
SmallVector<Type *> ParamTypes;
for (auto &ParamLocs : ParamLocsVec) {
@@ -633,8 +630,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
ParamTypes.push_back(Opnd->getType());
}
- // Create a merged function derived from the first function in the current
- // module context.
+ // Create a merged function derived from the current function.
Function *MergedFunc =
createMergedFunction(FMI, ParamTypes, ParamLocsVec);
@@ -645,7 +641,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
MergedFunc->dump();
});
- // Create a thunk to the merged function.
+ // Transform the current function into a thunk that calls the merged
+ // function.
createThunk(FMI, Params, MergedFunc);
LLVM_DEBUG({
dbgs() << "[GlobalMergeFunc] Thunk generated: \n";
More information about the llvm-branch-commits
mailing list