[llvm] 7ec26b2 - [CGData] Stable Function Map (#112662)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 31 22:23:59 PDT 2024


Author: Kyungwoo Lee
Date: 2024-10-31T22:23:55-07:00
New Revision: 7ec26b23f27f2adfe6847bc69debb84efa34ed08

URL: https://github.com/llvm/llvm-project/commit/7ec26b23f27f2adfe6847bc69debb84efa34ed08
DIFF: https://github.com/llvm/llvm-project/commit/7ec26b23f27f2adfe6847bc69debb84efa34ed08.diff

LOG: [CGData] Stable Function Map (#112662)

These define the main data structures to represent stable functions and
group similar functions in a function map.
Serialization is supported in a binary or yaml form.

Depends on #112638.
This is a patch for
https://discourse.llvm.org/t/rfc-global-function-merging/82608.

Added: 
    llvm/include/llvm/CGData/StableFunctionMap.h
    llvm/include/llvm/CGData/StableFunctionMapRecord.h
    llvm/lib/CGData/StableFunctionMap.cpp
    llvm/lib/CGData/StableFunctionMapRecord.cpp
    llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
    llvm/unittests/CGData/StableFunctionMapTest.cpp

Modified: 
    llvm/lib/CGData/CMakeLists.txt
    llvm/unittests/CGData/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CGData/StableFunctionMap.h b/llvm/include/llvm/CGData/StableFunctionMap.h
new file mode 100644
index 00000000000000..a2e1abb49a3eba
--- /dev/null
+++ b/llvm/include/llvm/CGData/StableFunctionMap.h
@@ -0,0 +1,138 @@
+//===- StableFunctionMap.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This defines the StableFunctionMap class, to track similar functions.
+// It provides a mechanism to map stable hashes of functions to their
+// corresponding metadata. It includes structures for storing function details
+// and methods for managing and querying these mappings.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_STABLEFUNCTIONMAP_H
+#define LLVM_CGDATA_STABLEFUNCTIONMAP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/StructuralHash.h"
+
+namespace llvm {
+
+using IndexPairHash = std::pair<IndexPair, stable_hash>;
+using IndexOperandHashVecType = SmallVector<IndexPairHash>;
+
+/// A stable function is a function with a stable hash while tracking the
+/// locations of ignored operands and their hashes.
+struct StableFunction {
+  /// The combined stable hash of the function.
+  stable_hash Hash;
+  /// The name of the function.
+  std::string FunctionName;
+  /// The name of the module the function is in.
+  std::string ModuleName;
+  /// The number of instructions.
+  unsigned InstCount;
+  /// A vector of pairs of IndexPair and operand hash which was skipped.
+  IndexOperandHashVecType IndexOperandHashes;
+
+  StableFunction(stable_hash Hash, const std::string FunctionName,
+                 const std::string ModuleName, unsigned InstCount,
+                 IndexOperandHashVecType &&IndexOperandHashes)
+      : Hash(Hash), FunctionName(FunctionName), ModuleName(ModuleName),
+        InstCount(InstCount),
+        IndexOperandHashes(std::move(IndexOperandHashes)) {}
+  StableFunction() = default;
+};
+
+struct StableFunctionMap {
+  /// An efficient form of StableFunction for fast look-up
+  struct StableFunctionEntry {
+    /// The combined stable hash of the function.
+    stable_hash Hash;
+    /// Id of the function name.
+    unsigned FunctionNameId;
+    /// Id of the module name.
+    unsigned ModuleNameId;
+    /// The number of instructions.
+    unsigned InstCount;
+    /// A map from an IndexPair to a stable_hash which was skipped.
+    std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
+
+    StableFunctionEntry(
+        stable_hash Hash, unsigned FunctionNameId, unsigned ModuleNameId,
+        unsigned InstCount,
+        std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
+        : Hash(Hash), FunctionNameId(FunctionNameId),
+          ModuleNameId(ModuleNameId), InstCount(InstCount),
+          IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
+  };
+
+  using HashFuncsMapType =
+      DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
+
+  /// Get the HashToFuncs map for serialization.
+  const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
+
+  /// Get the NameToId vector for serialization.
+  const SmallVector<std::string> getNames() const { return IdToName; }
+
+  /// Get an existing ID associated with the given name or create a new ID if it
+  /// doesn't exist.
+  unsigned getIdOrCreateForName(StringRef Name);
+
+  /// Get the name associated with a given ID
+  std::optional<std::string> getNameForId(unsigned Id) const;
+
+  /// Insert a `StableFunction` object into the function map. This method
+  /// handles the uniquing of string names and create a `StableFunctionEntry`
+  /// for insertion.
+  void insert(const StableFunction &Func);
+
+  /// Merge a \p OtherMap into this function map.
+  void merge(const StableFunctionMap &OtherMap);
+
+  /// \returns true if there is no stable function entry.
+  bool empty() const { return size() == 0; }
+
+  enum SizeType {
+    UniqueHashCount,        // The number of unique hashes in HashToFuncs.
+    TotalFunctionCount,     // The number of total functions in HashToFuncs.
+    MergeableFunctionCount, // The number of functions that can be merged based
+                            // on their hash.
+  };
+
+  /// \returns the size of StableFunctionMap.
+  /// \p Type is the type of size to return.
+  size_t size(SizeType Type = UniqueHashCount) const;
+
+  /// Finalize the stable function map by trimming content.
+  void finalize();
+
+private:
+  /// Insert a `StableFunctionEntry` into the function map directly. This
+  /// method assumes that string names have already been uniqued and the
+  /// `StableFunctionEntry` is ready for insertion.
+  void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
+    assert(!Finalized && "Cannot insert after finalization");
+    HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
+  }
+
+  /// A map from a stable_hash to a vector of functions with that hash.
+  HashFuncsMapType HashToFuncs;
+  /// A vector of strings to hold names.
+  SmallVector<std::string> IdToName;
+  /// A map from StringRef (name) to an ID.
+  StringMap<unsigned> NameToId;
+  /// True if the function map is finalized with minimal content.
+  bool Finalized = false;
+
+  friend struct StableFunctionMapRecord;
+};
+
+} // namespace llvm
+
+#endif

diff  --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
new file mode 100644
index 00000000000000..0517f2c20d72ff
--- /dev/null
+++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
@@ -0,0 +1,71 @@
+//===- StableFunctionMapRecord.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This defines the StableFunctionMapRecord structure, which provides
+// functionality for managing and serializing a StableFunctionMap. It includes
+// methods for serialization to and from raw and YAML streams, as well as
+// utilities for merging and finalizing function maps.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
+#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
+
+#include "llvm/CGData/StableFunctionMap.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct StableFunctionMapRecord {
+  std::unique_ptr<StableFunctionMap> FunctionMap;
+
+  StableFunctionMapRecord() {
+    FunctionMap = std::make_unique<StableFunctionMap>();
+  }
+
+  StableFunctionMapRecord(std::unique_ptr<StableFunctionMap> FunctionMap)
+      : FunctionMap(std::move(FunctionMap)) {}
+
+  /// A static helper function to serialize the stable function map without
+  /// owning the stable function map.
+  static void serialize(raw_ostream &OS, const StableFunctionMap *FunctionMap);
+
+  /// Serialize the stable function map to a raw_ostream.
+  void serialize(raw_ostream &OS) const;
+
+  /// Deserialize the stable function map from a raw_ostream.
+  void deserialize(const unsigned char *&Ptr);
+
+  /// Serialize the stable function map to a YAML stream.
+  void serializeYAML(yaml::Output &YOS) const;
+
+  /// Deserialize the stable function map from a YAML stream.
+  void deserializeYAML(yaml::Input &YIS);
+
+  /// Finalize the stable function map by trimming content.
+  void finalize() { FunctionMap->finalize(); }
+
+  /// Merge the stable function map into this one.
+  void merge(const StableFunctionMapRecord &Other) {
+    FunctionMap->merge(*Other.FunctionMap);
+  }
+
+  /// \returns true if the stable function map is empty.
+  bool empty() const { return FunctionMap->empty(); }
+
+  /// Print the stable function map in a YAML format.
+  void print(raw_ostream &OS = llvm::errs()) const {
+    yaml::Output YOS(OS);
+    serializeYAML(YOS);
+  }
+};
+
+} // namespace llvm
+
+#endif

diff  --git a/llvm/lib/CGData/CMakeLists.txt b/llvm/lib/CGData/CMakeLists.txt
index 157b0dfb7f9fcf..003173139f36c5 100644
--- a/llvm/lib/CGData/CMakeLists.txt
+++ b/llvm/lib/CGData/CMakeLists.txt
@@ -4,6 +4,8 @@ add_llvm_component_library(LLVMCGData
   CodeGenDataWriter.cpp
   OutlinedHashTree.cpp
   OutlinedHashTreeRecord.cpp
+  StableFunctionMap.cpp
+  StableFunctionMapRecord.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData

diff  --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp
new file mode 100644
index 00000000000000..cfef5b22fe7849
--- /dev/null
+++ b/llvm/lib/CGData/StableFunctionMap.cpp
@@ -0,0 +1,167 @@
+//===-- StableFunctionMap.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the functionality for the StableFunctionMap class, which
+// manages the mapping of stable function hashes to their metadata. It includes
+// methods for inserting, merging, and finalizing function entries, as well as
+// utilities for handling function names and IDs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMap.h"
+
+#define DEBUG_TYPE "stable-function-map"
+
+using namespace llvm;
+
+unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
+  auto It = NameToId.find(Name);
+  if (It != NameToId.end())
+    return It->second;
+  unsigned Id = IdToName.size();
+  assert(Id == NameToId.size() && "ID collision");
+  IdToName.emplace_back(Name.str());
+  NameToId[IdToName.back()] = Id;
+  return Id;
+}
+
+std::optional<std::string> StableFunctionMap::getNameForId(unsigned Id) const {
+  if (Id >= IdToName.size())
+    return std::nullopt;
+  return IdToName[Id];
+}
+
+void StableFunctionMap::insert(const StableFunction &Func) {
+  assert(!Finalized && "Cannot insert after finalization");
+  auto FuncNameId = getIdOrCreateForName(Func.FunctionName);
+  auto ModuleNameId = getIdOrCreateForName(Func.ModuleName);
+  auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+  for (auto &[Index, Hash] : Func.IndexOperandHashes)
+    (*IndexOperandHashMap)[Index] = Hash;
+  auto FuncEntry = std::make_unique<StableFunctionEntry>(
+      Func.Hash, FuncNameId, ModuleNameId, Func.InstCount,
+      std::move(IndexOperandHashMap));
+  insert(std::move(FuncEntry));
+}
+
+void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
+  assert(!Finalized && "Cannot merge after finalization");
+  for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
+    auto &ThisFuncs = HashToFuncs[Hash];
+    for (auto &Func : Funcs) {
+      auto FuncNameId =
+          getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
+      auto ModuleNameId =
+          getIdOrCreateForName(*OtherMap.getNameForId(Func->ModuleNameId));
+      auto ClonedIndexOperandHashMap =
+          std::make_unique<IndexOperandHashMapType>(*Func->IndexOperandHashMap);
+      ThisFuncs.emplace_back(std::make_unique<StableFunctionEntry>(
+          Func->Hash, FuncNameId, ModuleNameId, Func->InstCount,
+          std::move(ClonedIndexOperandHashMap)));
+    }
+  }
+}
+
+size_t StableFunctionMap::size(SizeType Type) const {
+  switch (Type) {
+  case UniqueHashCount:
+    return HashToFuncs.size();
+  case TotalFunctionCount: {
+    size_t Count = 0;
+    for (auto &Funcs : HashToFuncs)
+      Count += Funcs.second.size();
+    return Count;
+  }
+  case MergeableFunctionCount: {
+    size_t Count = 0;
+    for (auto &[Hash, Funcs] : HashToFuncs)
+      if (Funcs.size() >= 2)
+        Count += Funcs.size();
+    return Count;
+  }
+  }
+  llvm_unreachable("Unhandled size type");
+}
+
+using ParamLocs = SmallVector<IndexPair>;
+static void removeIdenticalIndexPair(
+    SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> &SFS) {
+  auto &RSF = SFS[0];
+  unsigned StableFunctionCount = SFS.size();
+
+  SmallVector<IndexPair> ToDelete;
+  for (auto &[Pair, Hash] : *(RSF->IndexOperandHashMap)) {
+    bool Identical = true;
+    for (unsigned J = 1; J < StableFunctionCount; ++J) {
+      auto &SF = SFS[J];
+      const auto &SHash = SF->IndexOperandHashMap->at(Pair);
+      if (Hash != SHash) {
+        Identical = false;
+        break;
+      }
+    }
+
+    // No need to parameterize them if the hashes are identical across stable
+    // functions.
+    if (Identical)
+      ToDelete.emplace_back(Pair);
+  }
+
+  for (auto &Pair : ToDelete)
+    for (auto &SF : SFS)
+      SF->IndexOperandHashMap->erase(Pair);
+}
+
+void StableFunctionMap::finalize() {
+  for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
+    auto &[StableHash, SFS] = *It;
+
+    // Group stable functions by ModuleIdentifier.
+    std::stable_sort(SFS.begin(), SFS.end(),
+                     [&](const std::unique_ptr<StableFunctionEntry> &L,
+                         const std::unique_ptr<StableFunctionEntry> &R) {
+                       return *getNameForId(L->ModuleNameId) <
+                              *getNameForId(R->ModuleNameId);
+                     });
+
+    // Consider the first function as the root function.
+    auto &RSF = SFS[0];
+
+    bool Invalid = false;
+    unsigned StableFunctionCount = SFS.size();
+    for (unsigned I = 1; I < StableFunctionCount; ++I) {
+      auto &SF = SFS[I];
+      assert(RSF->Hash == SF->Hash);
+      if (RSF->InstCount != SF->InstCount) {
+        Invalid = true;
+        break;
+      }
+      if (RSF->IndexOperandHashMap->size() != SF->IndexOperandHashMap->size()) {
+        Invalid = true;
+        break;
+      }
+      for (auto &P : *RSF->IndexOperandHashMap) {
+        auto &InstOpndIndex = P.first;
+        if (!SF->IndexOperandHashMap->count(InstOpndIndex)) {
+          Invalid = true;
+          break;
+        }
+      }
+    }
+    if (Invalid) {
+      HashToFuncs.erase(It);
+      continue;
+    }
+
+    // Trim the index pair that has the same operand hash across
+    // stable functions.
+    removeIdenticalIndexPair(SFS);
+  }
+
+  Finalized = true;
+}

diff  --git a/llvm/lib/CGData/StableFunctionMapRecord.cpp b/llvm/lib/CGData/StableFunctionMapRecord.cpp
new file mode 100644
index 00000000000000..8eb667a651ebe9
--- /dev/null
+++ b/llvm/lib/CGData/StableFunctionMapRecord.cpp
@@ -0,0 +1,202 @@
+//===-- StableFunctionMapRecord.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the functionality for the StableFunctionMapRecord class,
+// including methods for serialization and deserialization of stable function
+// maps to and from raw and YAML streams. It also includes utilities for
+// managing function entries and their metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMapRecord.h"
+#include "llvm/Support/EndianStream.h"
+
+#define DEBUG_TYPE "stable-function-map-record"
+
+using namespace llvm;
+using namespace llvm::support;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(IndexPairHash)
+LLVM_YAML_IS_SEQUENCE_VECTOR(StableFunction)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<IndexPairHash> {
+  static void mapping(IO &IO, IndexPairHash &Key) {
+    IO.mapRequired("InstIndex", Key.first.first);
+    IO.mapRequired("OpndIndex", Key.first.second);
+    IO.mapRequired("OpndHash", Key.second);
+  }
+};
+
+template <> struct MappingTraits<StableFunction> {
+  static void mapping(IO &IO, StableFunction &Func) {
+    IO.mapRequired("Hash", Func.Hash);
+    IO.mapRequired("FunctionName", Func.FunctionName);
+    IO.mapRequired("ModuleName", Func.ModuleName);
+    IO.mapRequired("InstCount", Func.InstCount);
+    IO.mapRequired("IndexOperandHashes", Func.IndexOperandHashes);
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+// Get a sorted vector of StableFunctionEntry pointers.
+static SmallVector<const StableFunctionMap::StableFunctionEntry *>
+getStableFunctionEntries(const StableFunctionMap &SFM) {
+  SmallVector<const StableFunctionMap::StableFunctionEntry *> FuncEntries;
+  for (const auto &P : SFM.getFunctionMap())
+    for (auto &Func : P.second)
+      FuncEntries.emplace_back(Func.get());
+
+  std::stable_sort(
+      FuncEntries.begin(), FuncEntries.end(), [&](auto &A, auto &B) {
+        return std::tuple(A->Hash, SFM.getNameForId(A->ModuleNameId),
+                          SFM.getNameForId(A->FunctionNameId)) <
+               std::tuple(B->Hash, SFM.getNameForId(B->ModuleNameId),
+                          SFM.getNameForId(B->FunctionNameId));
+      });
+  return FuncEntries;
+}
+
+// Get a sorted vector of IndexOperandHashes.
+static IndexOperandHashVecType getStableIndexOperandHashes(
+    const StableFunctionMap::StableFunctionEntry *FuncEntry) {
+  IndexOperandHashVecType IndexOperandHashes;
+  for (auto &[Indices, OpndHash] : *FuncEntry->IndexOperandHashMap)
+    IndexOperandHashes.emplace_back(Indices, OpndHash);
+  // The indices are unique, so we can just sort by the first.
+  llvm::sort(IndexOperandHashes);
+  return IndexOperandHashes;
+}
+
+void StableFunctionMapRecord::serialize(raw_ostream &OS) const {
+  serialize(OS, FunctionMap.get());
+}
+
+void StableFunctionMapRecord::serialize(raw_ostream &OS,
+                                        const StableFunctionMap *FunctionMap) {
+  support::endian::Writer Writer(OS, endianness::little);
+
+  // Write Names.
+  auto &Names = FunctionMap->getNames();
+  uint32_t ByteSize = 4;
+  Writer.write<uint32_t>(Names.size());
+  for (auto &Name : Names) {
+    Writer.OS << Name << '\0';
+    ByteSize += Name.size() + 1;
+  }
+  // Align ByteSize to 4 bytes.
+  uint32_t Padding = offsetToAlignment(ByteSize, Align(4));
+  for (uint32_t I = 0; I < Padding; ++I)
+    Writer.OS << '\0';
+
+  // Write StableFunctionEntries whose pointers are sorted.
+  auto FuncEntries = getStableFunctionEntries(*FunctionMap);
+  Writer.write<uint32_t>(FuncEntries.size());
+
+  for (const auto *FuncRef : FuncEntries) {
+    Writer.write<stable_hash>(FuncRef->Hash);
+    Writer.write<uint32_t>(FuncRef->FunctionNameId);
+    Writer.write<uint32_t>(FuncRef->ModuleNameId);
+    Writer.write<uint32_t>(FuncRef->InstCount);
+
+    // Emit IndexOperandHashes sorted from IndexOperandHashMap.
+    IndexOperandHashVecType IndexOperandHashes =
+        getStableIndexOperandHashes(FuncRef);
+    Writer.write<uint32_t>(IndexOperandHashes.size());
+    for (auto &IndexOperandHash : IndexOperandHashes) {
+      Writer.write<uint32_t>(IndexOperandHash.first.first);
+      Writer.write<uint32_t>(IndexOperandHash.first.second);
+      Writer.write<stable_hash>(IndexOperandHash.second);
+    }
+  }
+}
+
+void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
+  // Assert that Ptr is 4-byte aligned
+  assert(((uintptr_t)Ptr % 4) == 0);
+  // Read Names.
+  auto NumNames =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  // Early exit if there is no name.
+  if (NumNames == 0)
+    return;
+  for (unsigned I = 0; I < NumNames; ++I) {
+    StringRef Name(reinterpret_cast<const char *>(Ptr));
+    Ptr += Name.size() + 1;
+    FunctionMap->getIdOrCreateForName(Name);
+  }
+  // Align Ptr to 4 bytes.
+  Ptr = reinterpret_cast<const uint8_t *>(alignAddr(Ptr, Align(4)));
+
+  // Read StableFunctionEntries.
+  auto NumFuncs =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  for (unsigned I = 0; I < NumFuncs; ++I) {
+    auto Hash =
+        endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
+    auto FunctionNameId =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+    assert(FunctionMap->getNameForId(FunctionNameId) &&
+           "FunctionNameId out of range");
+    auto ModuleNameId =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+    assert(FunctionMap->getNameForId(ModuleNameId) &&
+           "ModuleNameId out of range");
+    auto InstCount =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+
+    // Read IndexOperandHashes to build IndexOperandHashMap
+    auto NumIndexOperandHashes =
+        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+    auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+    for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
+      auto InstIndex =
+          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+      auto OpndIndex =
+          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+      auto OpndHash =
+          endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
+      assert(InstIndex < InstCount && "InstIndex out of range");
+
+      IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
+    }
+
+    // Insert a new StableFunctionEntry into the map.
+    auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
+        Hash, FunctionNameId, ModuleNameId, InstCount,
+        std::move(IndexOperandHashMap));
+
+    FunctionMap->insert(std::move(FuncEntry));
+  }
+}
+
+void StableFunctionMapRecord::serializeYAML(yaml::Output &YOS) const {
+  auto FuncEntries = getStableFunctionEntries(*FunctionMap);
+  SmallVector<StableFunction> Functions;
+  for (const auto *FuncEntry : FuncEntries) {
+    auto IndexOperandHashes = getStableIndexOperandHashes(FuncEntry);
+    Functions.emplace_back(
+        FuncEntry->Hash, *FunctionMap->getNameForId(FuncEntry->FunctionNameId),
+        *FunctionMap->getNameForId(FuncEntry->ModuleNameId),
+        FuncEntry->InstCount, std::move(IndexOperandHashes));
+  }
+
+  YOS << Functions;
+}
+
+void StableFunctionMapRecord::deserializeYAML(yaml::Input &YIS) {
+  std::vector<StableFunction> Funcs;
+  YIS >> Funcs;
+  for (auto &Func : Funcs)
+    FunctionMap->insert(Func);
+  YIS.nextDocument();
+}

diff  --git a/llvm/unittests/CGData/CMakeLists.txt b/llvm/unittests/CGData/CMakeLists.txt
index 792b323130b474..0bdb9e1f08c702 100644
--- a/llvm/unittests/CGData/CMakeLists.txt
+++ b/llvm/unittests/CGData/CMakeLists.txt
@@ -9,6 +9,8 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(CGDataTests
   OutlinedHashTreeRecordTest.cpp
   OutlinedHashTreeTest.cpp
+  StableFunctionMapRecordTest.cpp
+  StableFunctionMapTest.cpp
   )
 
 target_link_libraries(CGDataTests PRIVATE LLVMTestingSupport)

diff  --git a/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
new file mode 100644
index 00000000000000..f5c9afe449da35
--- /dev/null
+++ b/llvm/unittests/CGData/StableFunctionMapRecordTest.cpp
@@ -0,0 +1,127 @@
+//===- StableFunctionMapRecordTest.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMapRecord.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(StableFunctionMapRecordTest, Print) {
+  StableFunctionMapRecord MapRecord;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  MapRecord.FunctionMap->insert(Func1);
+
+  const char *ExpectedMapStr = R"(---
+- Hash:            1
+  FunctionName:    Func1
+  ModuleName:      Mod1
+  InstCount:       2
+  IndexOperandHashes:
+    - InstIndex:       0
+      OpndIndex:       1
+      OpndHash:        3
+...
+)";
+  std::string MapDump;
+  raw_string_ostream OS(MapDump);
+  MapRecord.print(OS);
+  EXPECT_EQ(ExpectedMapStr, MapDump);
+}
+
+TEST(StableFunctionMapRecordTest, Stable) {
+  StableFunction Func1{1, "Func2", "Mod1", 1, {}};
+  StableFunction Func2{1, "Func3", "Mod1", 1, {}};
+  StableFunction Func3{1, "Func1", "Mod2", 1, {}};
+  StableFunction Func4{2, "Func4", "Mod3", 1, {}};
+
+  StableFunctionMapRecord MapRecord1;
+  MapRecord1.FunctionMap->insert(Func1);
+  MapRecord1.FunctionMap->insert(Func2);
+  MapRecord1.FunctionMap->insert(Func3);
+  MapRecord1.FunctionMap->insert(Func4);
+
+  StableFunctionMapRecord MapRecord2;
+  MapRecord2.FunctionMap->insert(Func4);
+  MapRecord2.FunctionMap->insert(Func3);
+  MapRecord2.FunctionMap->insert(Func2);
+  MapRecord2.FunctionMap->insert(Func1);
+
+  // Output is sorted by hash (1 < 2), module name (Mod1 < Mod2), and function
+  // name (Func2 < Func3).
+  std::string MapDump1;
+  raw_string_ostream OS1(MapDump1);
+  MapRecord1.print(OS1);
+  std::string MapDump2;
+  raw_string_ostream OS2(MapDump2);
+  MapRecord2.print(OS2);
+  EXPECT_EQ(MapDump1, MapDump2);
+}
+
+TEST(StableFunctionMapRecordTest, Serialize) {
+  StableFunctionMapRecord MapRecord1;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 2}, 4}}};
+  StableFunction Func2{2, "Func2", "Mod1", 3, {{{0, 1}, 2}}};
+  StableFunction Func3{2, "Func3", "Mod1", 3, {{{0, 1}, 3}}};
+  MapRecord1.FunctionMap->insert(Func1);
+  MapRecord1.FunctionMap->insert(Func2);
+  MapRecord1.FunctionMap->insert(Func3);
+
+  // Serialize and deserialize the map.
+  SmallVector<char> Out;
+  raw_svector_ostream OS(Out);
+  MapRecord1.serialize(OS);
+
+  StableFunctionMapRecord MapRecord2;
+  const uint8_t *Data = reinterpret_cast<const uint8_t *>(Out.data());
+  MapRecord2.deserialize(Data);
+
+  // Two maps should be identical.
+  std::string MapDump1;
+  raw_string_ostream OS1(MapDump1);
+  MapRecord1.print(OS1);
+  std::string MapDump2;
+  raw_string_ostream OS2(MapDump2);
+  MapRecord2.print(OS2);
+
+  EXPECT_EQ(MapDump1, MapDump2);
+}
+
+TEST(StableFunctionMapRecordTest, SerializeYAML) {
+  StableFunctionMapRecord MapRecord1;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 2}, 4}}};
+  StableFunction Func2{2, "Func2", "Mod1", 3, {{{0, 1}, 2}}};
+  StableFunction Func3{2, "Func3", "Mod1", 3, {{{0, 1}, 3}}};
+  MapRecord1.FunctionMap->insert(Func1);
+  MapRecord1.FunctionMap->insert(Func2);
+  MapRecord1.FunctionMap->insert(Func3);
+
+  // Serialize and deserialize the map in a YAML format.
+  std::string Out;
+  raw_string_ostream OS(Out);
+  yaml::Output YOS(OS);
+  MapRecord1.serializeYAML(YOS);
+
+  StableFunctionMapRecord MapRecord2;
+  yaml::Input YIS(StringRef(Out.data(), Out.size()));
+  MapRecord2.deserializeYAML(YIS);
+
+  // Two maps should be identical.
+  std::string MapDump1;
+  raw_string_ostream OS1(MapDump1);
+  MapRecord1.print(OS1);
+  std::string MapDump2;
+  raw_string_ostream OS2(MapDump2);
+  MapRecord2.print(OS2);
+
+  EXPECT_EQ(MapDump1, MapDump2);
+}
+
+} // end namespace

diff  --git a/llvm/unittests/CGData/StableFunctionMapTest.cpp b/llvm/unittests/CGData/StableFunctionMapTest.cpp
new file mode 100644
index 00000000000000..5e178dcd667240
--- /dev/null
+++ b/llvm/unittests/CGData/StableFunctionMapTest.cpp
@@ -0,0 +1,128 @@
+//===- StableFunctionMapTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CGData/StableFunctionMap.h"
+#include "gmock/gmock-matchers.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+using testing::Contains;
+using testing::IsEmpty;
+using testing::Key;
+using testing::Not;
+using testing::Pair;
+using testing::SizeIs;
+
+TEST(StableFunctionMap, Name) {
+  StableFunctionMap Map;
+  EXPECT_TRUE(Map.empty());
+  EXPECT_TRUE(Map.getNames().empty());
+  unsigned ID1 = Map.getIdOrCreateForName("Func1");
+  unsigned ID2 = Map.getIdOrCreateForName("Func2");
+  unsigned ID3 = Map.getIdOrCreateForName("Func1");
+
+  EXPECT_THAT(Map.getNames(), SizeIs(2));
+  // The 
diff erent names should return 
diff erent IDs.
+  EXPECT_NE(ID1, ID2);
+  // The same name should return the same ID.
+  EXPECT_EQ(ID1, ID3);
+  // The IDs should be valid.
+  EXPECT_EQ(*Map.getNameForId(ID1), "Func1");
+  EXPECT_EQ(*Map.getNameForId(ID2), "Func2");
+}
+
+TEST(StableFunctionMap, Insert) {
+  StableFunctionMap Map;
+
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod1", 2, {{{0, 1}, 2}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+  // We only have a unique hash, 1
+  EXPECT_THAT(Map, SizeIs(1));
+  // We have two functions with the same hash which are potentially mergeable.
+  EXPECT_EQ(Map.size(StableFunctionMap::SizeType::TotalFunctionCount), 2u);
+  EXPECT_EQ(Map.size(StableFunctionMap::SizeType::MergeableFunctionCount), 2u);
+}
+
+TEST(StableFunctionMap, Merge) {
+  StableFunctionMap Map1;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod1", 2, {{{0, 1}, 2}}};
+  StableFunction Func3{2, "Func3", "Mod1", 2, {{{1, 1}, 2}}};
+  Map1.insert(Func1);
+  Map1.insert(Func2);
+  Map1.insert(Func3);
+
+  StableFunctionMap Map2;
+  StableFunction Func4{1, "Func4", "Mod2", 2, {{{0, 1}, 4}}};
+  StableFunction Func5{2, "Func5", "Mod2", 2, {{{1, 1}, 5}}};
+  StableFunction Func6{3, "Func6", "Mod2", 2, {{{1, 1}, 6}}};
+  Map2.insert(Func4);
+  Map2.insert(Func5);
+  Map2.insert(Func6);
+
+  // Merge two maps.
+  Map1.merge(Map2);
+
+  // We only have two unique hashes, 1, 2 and 3
+  EXPECT_THAT(Map1, SizeIs(3));
+  // We have total 6 functions.
+  EXPECT_EQ(Map1.size(StableFunctionMap::SizeType::TotalFunctionCount), 6u);
+  // We have 5 mergeable functions. Func6 only has a unique hash, 3.
+  EXPECT_EQ(Map1.size(StableFunctionMap::SizeType::MergeableFunctionCount), 5u);
+}
+
+TEST(StableFunctionMap, Finalize1) {
+  StableFunctionMap Map;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod2", 3, {{{0, 1}, 2}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+
+  // Instruction count is mis-matched, so they're not mergeable.
+  Map.finalize();
+  EXPECT_THAT(Map, IsEmpty());
+}
+
+TEST(StableFunctionMap, Finalize2) {
+  StableFunctionMap Map;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}}};
+  StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+
+  // Operand map size is mis-matched, so they're not mergeable.
+  Map.finalize();
+  EXPECT_THAT(Map, IsEmpty());
+}
+
+TEST(StableFunctionMap, Finalize3) {
+  StableFunctionMap Map;
+  StableFunction Func1{1, "Func1", "Mod1", 2, {{{0, 1}, 3}, {{1, 1}, 1}}};
+  StableFunction Func2{1, "Func2", "Mod2", 2, {{{0, 1}, 2}, {{1, 1}, 1}}};
+  Map.insert(Func1);
+  Map.insert(Func2);
+
+  // The same operand entry is removed, which is redundant.
+  Map.finalize();
+  auto &M = Map.getFunctionMap();
+  EXPECT_THAT(M, SizeIs(1));
+  auto &FuncEntries = M.begin()->second;
+  for (auto &FuncEntry : FuncEntries) {
+    EXPECT_THAT(*FuncEntry->IndexOperandHashMap, SizeIs(1));
+    ASSERT_THAT(*FuncEntry->IndexOperandHashMap,
+                Not(Contains(Key(Pair(1, 1)))));
+  }
+}
+
+} // end namespace


        


More information about the llvm-commits mailing list