[llvm] 76dd742 - [CGData] Lazy loading support for stable function map (#151660)

Thu Aug 14 13:49:12 PDT 2025

Author: Zhaoxuan Jiang
Date: 2025-08-14T13:49:09-07:00
New Revision: 76dd742f7b32e4d3acf50fab1dbbd897f215837e

URL: https://github.com/llvm/llvm-project/commit/76dd742f7b32e4d3acf50fab1dbbd897f215837e
DIFF: https://github.com/llvm/llvm-project/commit/76dd742f7b32e4d3acf50fab1dbbd897f215837e.diff

LOG: [CGData] Lazy loading support for stable function map (#151660)

The stable function map could be huge for a large application. Fully
loading it is slow and consumes a significant amount of memory, which is
unnecessary and drastically slows down compilation especially for
non-LTO and distributed-ThinLTO setups. This patch introduces an opt-in
lazy loading support for the stable function map. The detailed changes
are:

- `StableFunctionMap`
- The map now stores entries in an `EntryStorage` struct, which includes
offsets for serialized entries and a `std::once_flag` for thread-safe
lazy loading.
- The underlying map type is changed from `DenseMap` to
`std::unordered_map` for compatibility with `std::once_flag`.
- `contains()`, `size()` and `at()` are implemented to only load
requested entries on demand.

- Lazy Loading Mechanism
- When reading indexed codegen data, if the newly-introduced
`-indexed-codegen-data-lazy-loading` flag is set, the stable function
map is not fully deserialized up front. The binary format for the stable
function map now includes offsets and sizes to support lazy loading.
- The safety of lazy loading is guarded by the once flag per function
hash. This guarantees that even in a multi-threaded environment, the
deserialization for a given function hash will happen exactly once. The
first thread to request it performs the load, and subsequent threads
will wait for it to complete before using the data. For single-threaded
builds, the overhead is negligible (a single check on the once flag).
For multi-threaded scenarios, users can omit the flag to retain the
previous eager-loading behavior.

Added: 
    

Modified: 
    llvm/include/llvm/CGData/CodeGenData.h
    llvm/include/llvm/CGData/CodeGenData.inc
    llvm/include/llvm/CGData/StableFunctionMap.h
    llvm/include/llvm/CGData/StableFunctionMapRecord.h
    llvm/lib/CGData/CodeGenData.cpp
    llvm/lib/CGData/CodeGenDataReader.cpp
    llvm/lib/CGData/StableFunctionMap.cpp
    llvm/lib/CGData/StableFunctionMapRecord.cpp
    llvm/lib/CodeGen/GlobalMergeFunctions.cpp
    llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
    llvm/test/tools/llvm-cgdata/empty.test
    llvm/test/tools/llvm-cgdata/error.test
    llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
    llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
    llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
    llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
    llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
    llvm/tools/llvm-cgdata/Opts.td
    llvm/tools/llvm-cgdata/llvm-cgdata.cpp
    llvm/unittests/CGData/StableFunctionMapTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 38b96b72ccac6..e44497a408245 100644

--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -285,6 +285,9 @@ enum CGDataVersion {
   // Version 3 adds the total size of the Names in the stable function map so
   // we can skip reading them into the memory for non-assertion builds.
   Version3 = 3,
+  // Version 4 adjusts the structure of stable function merging map for
+  // efficient lazy loading support.
+  Version4 = 4,
   CurrentVersion = CG_DATA_INDEX_VERSION
 };
 const uint64_t Version = CGDataVersion::CurrentVersion;

diff  --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc
index 94de4c0b017a2..d5fbe2fb97718 100644
--- a/llvm/include/llvm/CGData/CodeGenData.inc
+++ b/llvm/include/llvm/CGData/CodeGenData.inc
@@ -49,4 +49,4 @@ CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
 #endif
 
 /* Indexed codegen data format version (start from 1). */
-#define CG_DATA_INDEX_VERSION 3
+#define CG_DATA_INDEX_VERSION 4

diff  --git a/llvm/include/llvm/CGData/StableFunctionMap.h b/llvm/include/llvm/CGData/StableFunctionMap.h
index bcb72e8216973..ea3523c3a3299 100644
--- a/llvm/include/llvm/CGData/StableFunctionMap.h
+++ b/llvm/include/llvm/CGData/StableFunctionMap.h
@@ -20,6 +20,8 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/StructuralHash.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <mutex>
 
 namespace llvm {
 
@@ -72,11 +74,37 @@ struct StableFunctionMap {
           IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
   };
 
-  using HashFuncsMapType =
-      DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
+  using StableFunctionEntries =
+      SmallVector<std::unique_ptr<StableFunctionEntry>>;
+
+  /// In addition to the deserialized StableFunctionEntry, the struct stores
+  /// the offsets of corresponding serialized stable function entries, and a
+  /// once flag for safe lazy loading in a multithreaded environment.
+  struct EntryStorage {
+    /// The actual storage of deserialized stable function entries. If the map
+    /// is lazily loaded, this will be empty until the first access by the
+    /// corresponding function hash.
+    StableFunctionEntries Entries;
+
+  private:
+    /// This is used to deserialize the entry lazily. Each element is the
+    /// corresponding serialized stable function entry's offset in the memory
+    /// buffer (StableFunctionMap::Buffer).
+    /// The offsets are only populated when loading the map lazily, otherwise
+    /// it is empty.
+    SmallVector<uint64_t> Offsets;
+    std::once_flag LazyLoadFlag;
+    friend struct StableFunctionMap;
+    friend struct StableFunctionMapRecord;
+  };
+
+  // Note: DenseMap requires value type to be copyable even if only using
+  // in-place insertion. Use STL instead. This also affects the
+  // deletion-while-iteration in finalize().
+  using HashFuncsMapType = std::unordered_map<stable_hash, EntryStorage>;
 
   /// Get the HashToFuncs map for serialization.
-  const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
+  const HashFuncsMapType &getFunctionMap() const;
 
   /// Get the NameToId vector for serialization.
   ArrayRef<std::string> getNames() const { return IdToName; }
@@ -99,6 +127,19 @@ struct StableFunctionMap {
   /// \returns true if there is no stable function entry.
   bool empty() const { return size() == 0; }
 
+  /// \returns true if there is an entry for the given function hash.
+  /// This does not trigger lazy loading.
+  bool contains(HashFuncsMapType::key_type FunctionHash) const {
+    return HashToFuncs.count(FunctionHash) > 0;
+  }
+
+  /// \returns the stable function entries for the given function hash. If the
+  /// map is lazily loaded, it will deserialize the entries if it is not already
+  /// done, other requests to the same hash at the same time will be blocked
+  /// until the entries are deserialized.
+  const StableFunctionEntries &
+  at(HashFuncsMapType::key_type FunctionHash) const;
+
   enum SizeType {
     UniqueHashCount,        // The number of unique hashes in HashToFuncs.
     TotalFunctionCount,     // The number of total functions in HashToFuncs.
@@ -119,17 +160,31 @@ struct StableFunctionMap {
   /// `StableFunctionEntry` is ready for insertion.
   void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
     assert(!Finalized && "Cannot insert after finalization");
-    HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
+    HashToFuncs[FuncEntry->Hash].Entries.emplace_back(std::move(FuncEntry));
   }
 
+  void deserializeLazyLoadingEntry(HashFuncsMapType::iterator It) const;
+
+  /// Eagerly deserialize all the unloaded entries in the lazy loading map.
+  void deserializeLazyLoadingEntries() const;
+
+  bool isLazilyLoaded() const { return (bool)Buffer; }
+
   /// A map from a stable_hash to a vector of functions with that hash.
-  HashFuncsMapType HashToFuncs;
+  mutable HashFuncsMapType HashToFuncs;
   /// A vector of strings to hold names.
   SmallVector<std::string> IdToName;
   /// A map from StringRef (name) to an ID.
   StringMap<unsigned> NameToId;
   /// True if the function map is finalized with minimal content.
   bool Finalized = false;
+  /// The memory buffer that contains the serialized stable function map for
+  /// lazy loading.
+  /// Non-empty only if this StableFunctionMap is created from a MemoryBuffer
+  /// (i.e. by IndexedCodeGenDataReader::read()) and lazily deserialized.
+  std::shared_ptr<MemoryBuffer> Buffer;
+  /// Whether to read stable function names from the buffer.
+  bool ReadStableFunctionMapNames = true;
 
   friend struct StableFunctionMapRecord;
 };

diff  --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
index a75cb12a70ba6..2d8b573a3cb46 100644
--- a/llvm/include/llvm/CGData/StableFunctionMapRecord.h
+++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
@@ -24,6 +24,26 @@
 
 namespace llvm {
 
+/// The structure of the serialized stable function map is as follows:
+/// - Number of unique function/module names
+/// - Total size of unique function/module names for opt-in skipping
+/// - Unique function/module names
+/// - Padding to align to 4 bytes
+/// - Number of StableFunctionEntries
+/// - Hashes of each StableFunctionEntry
+/// - Fixed-size fields for each StableFunctionEntry (the order is consistent
+///   with the hashes above):
+///   - FunctionNameId
+///   - ModuleNameId
+///   - InstCount
+///   - Relative offset to the beginning of IndexOperandHashes for this entry
+/// - Total size of variable-sized IndexOperandHashes for lazy-loading support
+/// - Variable-sized IndexOperandHashes for each StableFunctionEntry:
+///   - Number of IndexOperandHashes
+///   - Contents of each IndexOperandHashes
+///     - InstIndex
+///     - OpndIndex
+///     - OpndHash
 struct StableFunctionMapRecord {
   std::unique_ptr<StableFunctionMap> FunctionMap;
 
@@ -40,13 +60,25 @@ struct StableFunctionMapRecord {
                                  const StableFunctionMap *FunctionMap,
                                  std::vector<CGDataPatchItem> &PatchItems);
 
+  /// A static helper function to deserialize the stable function map entry.
+  /// Ptr should be pointing to the start of the fixed-sized fields of the
+  /// entry when passed in.
+  LLVM_ABI static void deserializeEntry(const unsigned char *Ptr,
+                                        stable_hash Hash,
+                                        StableFunctionMap *FunctionMap);
+
   /// Serialize the stable function map to a raw_ostream.
   LLVM_ABI void serialize(raw_ostream &OS,
                           std::vector<CGDataPatchItem> &PatchItems) const;
 
   /// Deserialize the stable function map from a raw_ostream.
-  LLVM_ABI void deserialize(const unsigned char *&Ptr,
-                            bool ReadStableFunctionMapNames = true);
+  LLVM_ABI void deserialize(const unsigned char *&Ptr);
+
+  /// Lazily deserialize the stable function map from `Buffer` starting at
+  /// `Offset`. The individual stable function entry would be read lazily from
+  /// `Buffer` when the function map is accessed.
+  LLVM_ABI void lazyDeserialize(std::shared_ptr<MemoryBuffer> Buffer,
+                                uint64_t Offset);
 
   /// Serialize the stable function map to a YAML stream.
   LLVM_ABI void serializeYAML(yaml::Output &YOS) const;
@@ -70,6 +102,18 @@ struct StableFunctionMapRecord {
     yaml::Output YOS(OS);
     serializeYAML(YOS);
   }
+
+  /// Set whether to read stable function names from the buffer.
+  /// Has no effect if the function map is read from a YAML stream.
+  void setReadStableFunctionMapNames(bool Read) {
+    assert(
+        FunctionMap->empty() &&
+        "Cannot change ReadStableFunctionMapNames after the map is populated");
+    FunctionMap->ReadStableFunctionMapNames = Read;
+  }
+
+private:
+  void deserialize(const unsigned char *&Ptr, bool Lazy);
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index cd012342e1958..b4f08c3d13b0d 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -186,7 +186,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
     return make_error<CGDataError>(cgdata_error::unsupported_version);
   H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
 
-  static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3,
+  static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version4,
                 "Please update the offset computation below if a new field has "
                 "been added to the header.");
   H.OutlinedHashTreeOffset =

diff  --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index 0ab35499c8986..fc59be8df525a 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -26,6 +26,12 @@ static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
              "disabled to save memory and time for final consumption of the "
              "indexed CodeGenData in production."));
 
+cl::opt<bool> IndexedCodeGenDataLazyLoading(
+    "indexed-codegen-data-lazy-loading", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Lazily load indexed CodeGenData. Enable to save memory and time "
+        "for final consumption of the indexed CodeGenData in production."));
+
 namespace llvm {
 
 static Expected<std::unique_ptr<MemoryBuffer>>
@@ -109,11 +115,20 @@ Error IndexedCodeGenDataReader::read() {
       return error(cgdata_error::eof);
     HashTreeRecord.deserialize(Ptr);
   }
+
+  // TODO: lazy loading support for outlined hash tree.
+  std::shared_ptr<MemoryBuffer> SharedDataBuffer = std::move(DataBuffer);
   if (hasStableFunctionMap()) {
     const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
     if (Ptr >= End)
       return error(cgdata_error::eof);
-    FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
+    FunctionMapRecord.setReadStableFunctionMapNames(
+        IndexedCodeGenDataReadFunctionMapNames);
+    if (IndexedCodeGenDataLazyLoading)
+      FunctionMapRecord.lazyDeserialize(SharedDataBuffer,
+                                        Header.StableFunctionMapOffset);
+    else
+      FunctionMapRecord.deserialize(Ptr);
   }
 
   return success();

diff  --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp
index 87f1e76afb60b..2f54fad0aa084 100644
--- a/llvm/lib/CGData/StableFunctionMap.cpp
+++ b/llvm/lib/CGData/StableFunctionMap.cpp
@@ -15,8 +15,10 @@
 
 #include "llvm/CGData/StableFunctionMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CGData/StableFunctionMapRecord.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include <mutex>
 
 #define DEBUG_TYPE "stable-function-map"
 
@@ -93,9 +95,10 @@ void StableFunctionMap::insert(const StableFunction &Func) {
 
 void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
   assert(!Finalized && "Cannot merge after finalization");
+  deserializeLazyLoadingEntries();
   for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
-    auto &ThisFuncs = HashToFuncs[Hash];
-    for (auto &Func : Funcs) {
+    auto &ThisFuncs = HashToFuncs[Hash].Entries;
+    for (auto &Func : Funcs.Entries) {
       auto FuncNameId =
           getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
       auto ModuleNameId =
@@ -114,25 +117,63 @@ size_t StableFunctionMap::size(SizeType Type) const {
   case UniqueHashCount:
     return HashToFuncs.size();
   case TotalFunctionCount: {
+    deserializeLazyLoadingEntries();
     size_t Count = 0;
     for (auto &Funcs : HashToFuncs)
-      Count += Funcs.second.size();
+      Count += Funcs.second.Entries.size();
     return Count;
   }
   case MergeableFunctionCount: {
+    deserializeLazyLoadingEntries();
     size_t Count = 0;
     for (auto &[Hash, Funcs] : HashToFuncs)
-      if (Funcs.size() >= 2)
-        Count += Funcs.size();
+      if (Funcs.Entries.size() >= 2)
+        Count += Funcs.Entries.size();
     return Count;
   }
   }
   llvm_unreachable("Unhandled size type");
 }
 
+const StableFunctionMap::StableFunctionEntries &
+StableFunctionMap::at(HashFuncsMapType::key_type FunctionHash) const {
+  auto It = HashToFuncs.find(FunctionHash);
+  if (isLazilyLoaded())
+    deserializeLazyLoadingEntry(It);
+  return It->second.Entries;
+}
+
+void StableFunctionMap::deserializeLazyLoadingEntry(
+    HashFuncsMapType::iterator It) const {
+  assert(isLazilyLoaded() && "Cannot deserialize non-lazily-loaded map");
+  auto &[Hash, Storage] = *It;
+  std::call_once(Storage.LazyLoadFlag,
+                 [this, HashArg = Hash, &StorageArg = Storage]() {
+                   for (auto Offset : StorageArg.Offsets)
+                     StableFunctionMapRecord::deserializeEntry(
+                         reinterpret_cast<const unsigned char *>(Offset),
+                         HashArg, const_cast<StableFunctionMap *>(this));
+                 });
+}
+
+void StableFunctionMap::deserializeLazyLoadingEntries() const {
+  if (!isLazilyLoaded())
+    return;
+  for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It)
+    deserializeLazyLoadingEntry(It);
+}
+
+const StableFunctionMap::HashFuncsMapType &
+StableFunctionMap::getFunctionMap() const {
+  // Ensure all entries are deserialized before returning the raw map.
+  if (isLazilyLoaded())
+    deserializeLazyLoadingEntries();
+  return HashToFuncs;
+}
+
 using ParamLocs = SmallVector<IndexPair>;
-static void removeIdenticalIndexPair(
-    SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> &SFS) {
+static void
+removeIdenticalIndexPair(StableFunctionMap::StableFunctionEntries &SFS) {
   auto &RSF = SFS[0];
   unsigned StableFunctionCount = SFS.size();
 
@@ -159,9 +200,7 @@ static void removeIdenticalIndexPair(
       SF->IndexOperandHashMap->erase(Pair);
 }
 
-static bool isProfitable(
-    const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
-        &SFS) {
+static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS) {
   unsigned StableFunctionCount = SFS.size();
   if (StableFunctionCount < GlobalMergingMinMerges)
     return false;
@@ -202,8 +241,11 @@ static bool isProfitable(
 }
 
 void StableFunctionMap::finalize(bool SkipTrim) {
+  deserializeLazyLoadingEntries();
+  SmallVector<HashFuncsMapType::iterator> ToDelete;
   for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
-    auto &[StableHash, SFS] = *It;
+    auto &[StableHash, Storage] = *It;
+    auto &SFS = Storage.Entries;
 
     // Group stable functions by ModuleIdentifier.
     llvm::stable_sort(SFS, [&](const std::unique_ptr<StableFunctionEntry> &L,
@@ -236,7 +278,7 @@ void StableFunctionMap::finalize(bool SkipTrim) {
       }
     }
     if (Invalid) {
-      HashToFuncs.erase(It);
+      ToDelete.push_back(It);
       continue;
     }
 
@@ -248,8 +290,10 @@ void StableFunctionMap::finalize(bool SkipTrim) {
     removeIdenticalIndexPair(SFS);
 
     if (!isProfitable(SFS))
-      HashToFuncs.erase(It);
+      ToDelete.push_back(It);
   }
+  for (auto It : ToDelete)
+    HashToFuncs.erase(It);
 
   Finalized = true;
 }

diff  --git a/llvm/lib/CGData/StableFunctionMapRecord.cpp b/llvm/lib/CGData/StableFunctionMapRecord.cpp
index 423e068023088..e585995ba6a31 100644
--- a/llvm/lib/CGData/StableFunctionMapRecord.cpp
+++ b/llvm/lib/CGData/StableFunctionMapRecord.cpp
@@ -53,7 +53,7 @@ static SmallVector<const StableFunctionMap::StableFunctionEntry *>
 getStableFunctionEntries(const StableFunctionMap &SFM) {
   SmallVector<const StableFunctionMap::StableFunctionEntry *> FuncEntries;
   for (const auto &P : SFM.getFunctionMap())
-    for (auto &Func : P.second)
+    for (auto &Func : P.second.Entries)
       FuncEntries.emplace_back(Func.get());
 
   llvm::stable_sort(
@@ -107,14 +107,25 @@ void StableFunctionMapRecord::serialize(
   // Write StableFunctionEntries whose pointers are sorted.
   auto FuncEntries = getStableFunctionEntries(*FunctionMap);
   Writer.write<uint32_t>(FuncEntries.size());
-
-  for (const auto *FuncRef : FuncEntries) {
+  for (const auto *FuncRef : FuncEntries)
     Writer.write<stable_hash>(FuncRef->Hash);
+  std::vector<uint64_t> IndexOperandHashesOffsets;
+  IndexOperandHashesOffsets.reserve(FuncEntries.size());
+  for (const auto *FuncRef : FuncEntries) {
     Writer.write<uint32_t>(FuncRef->FunctionNameId);
     Writer.write<uint32_t>(FuncRef->ModuleNameId);
     Writer.write<uint32_t>(FuncRef->InstCount);
-
+    const uint64_t Offset = Writer.OS.tell();
+    IndexOperandHashesOffsets.push_back(Offset);
+    Writer.write<uint64_t>(0);
+  }
+  const uint64_t IndexOperandHashesByteSizeOffset = Writer.OS.tell();
+  Writer.write<uint64_t>(0);
+  for (size_t I = 0; I < FuncEntries.size(); ++I) {
+    const uint64_t Offset = Writer.OS.tell() - IndexOperandHashesOffsets[I];
+    PatchItems.emplace_back(IndexOperandHashesOffsets[I], &Offset, 1);
     // Emit IndexOperandHashes sorted from IndexOperandHashMap.
+    const auto *FuncRef = FuncEntries[I];
     IndexOperandHashVecType IndexOperandHashes =
         getStableIndexOperandHashes(FuncRef);
     Writer.write<uint32_t>(IndexOperandHashes.size());
@@ -124,10 +135,62 @@ void StableFunctionMapRecord::serialize(
       Writer.write<stable_hash>(IndexOperandHash.second);
     }
   }
+  // Write the total size of IndexOperandHashes.
+  const uint64_t IndexOperandHashesByteSize =
+      Writer.OS.tell() - IndexOperandHashesByteSizeOffset - sizeof(uint64_t);
+  PatchItems.emplace_back(IndexOperandHashesByteSizeOffset,
+                          &IndexOperandHashesByteSize, 1);
+}
+
+void StableFunctionMapRecord::deserializeEntry(const unsigned char *Ptr,
+                                               stable_hash Hash,
+                                               StableFunctionMap *FunctionMap) {
+  auto FunctionNameId =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  if (FunctionMap->ReadStableFunctionMapNames)
+    assert(FunctionMap->getNameForId(FunctionNameId) &&
+           "FunctionNameId out of range");
+  auto ModuleNameId =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  if (FunctionMap->ReadStableFunctionMapNames)
+    assert(FunctionMap->getNameForId(ModuleNameId) &&
+           "ModuleNameId out of range");
+  auto InstCount =
+      endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+
+  // Read IndexOperandHashes to build IndexOperandHashMap
+  auto CurrentPosition = reinterpret_cast<uintptr_t>(Ptr);
+  auto IndexOperandHashesOffset =
+      endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
+  auto *IndexOperandHashesPtr = reinterpret_cast<const unsigned char *>(
+      CurrentPosition + IndexOperandHashesOffset);
+  auto NumIndexOperandHashes =
+      endian::readNext<uint32_t, endianness::little, unaligned>(
+          IndexOperandHashesPtr);
+  auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+  for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
+    auto InstIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
+        IndexOperandHashesPtr);
+    auto OpndIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
+        IndexOperandHashesPtr);
+    auto OpndHash =
+        endian::readNext<stable_hash, endianness::little, unaligned>(
+            IndexOperandHashesPtr);
+    assert(InstIndex < InstCount && "InstIndex out of range");
+
+    IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
+  }
+
+  // Insert a new StableFunctionEntry into the map.
+  auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
+      Hash, FunctionNameId, ModuleNameId, InstCount,
+      std::move(IndexOperandHashMap));
+
+  FunctionMap->insert(std::move(FuncEntry));
 }
 
 void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
-                                          bool ReadStableFunctionMapNames) {
+                                          bool Lazy) {
   // Assert that Ptr is 4-byte aligned
   assert(((uintptr_t)Ptr % 4) == 0);
   // Read Names.
@@ -139,7 +202,7 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
   const auto NamesByteSize =
       endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
   const auto NamesOffset = reinterpret_cast<uintptr_t>(Ptr);
-  if (ReadStableFunctionMapNames) {
+  if (FunctionMap->ReadStableFunctionMapNames) {
     for (unsigned I = 0; I < NumNames; ++I) {
       StringRef Name(reinterpret_cast<const char *>(Ptr));
       Ptr += Name.size() + 1;
@@ -157,47 +220,51 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
   // Read StableFunctionEntries.
   auto NumFuncs =
       endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+  auto FixedSizeFieldsOffset =
+      reinterpret_cast<uintptr_t>(Ptr) + NumFuncs * sizeof(stable_hash);
+  constexpr uint32_t FixedSizeFieldsSizePerEntry =
+      // FunctionNameId
+      sizeof(uint32_t) +
+      // ModuleNameId
+      sizeof(uint32_t) +
+      // InstCount
+      sizeof(uint32_t) +
+      // Relative offset to IndexOperandHashes
+      sizeof(uint64_t);
   for (unsigned I = 0; I < NumFuncs; ++I) {
     auto Hash =
         endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
-    [[maybe_unused]] auto FunctionNameId =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-    [[maybe_unused]] auto ModuleNameId =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-    // Only validate IDs if we've read the names
-    if (ReadStableFunctionMapNames) {
-      assert(FunctionMap->getNameForId(FunctionNameId) &&
-             "FunctionNameId out of range");
-      assert(FunctionMap->getNameForId(ModuleNameId) &&
-             "ModuleNameId out of range");
+    if (Lazy) {
+      auto It = FunctionMap->HashToFuncs.try_emplace(Hash).first;
+      StableFunctionMap::EntryStorage &Storage = It->second;
+      Storage.Offsets.push_back(FixedSizeFieldsOffset);
+    } else {
+      deserializeEntry(
+          reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset), Hash,
+          FunctionMap.get());
     }
+    FixedSizeFieldsOffset += FixedSizeFieldsSizePerEntry;
+  }
 
-    auto InstCount =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-
-    // Read IndexOperandHashes to build IndexOperandHashMap
-    auto NumIndexOperandHashes =
-        endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-    auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
-    for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
-      auto InstIndex =
-          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-      auto OpndIndex =
-          endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-      auto OpndHash =
-          endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
-      assert(InstIndex < InstCount && "InstIndex out of range");
-
-      IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
-    }
+  // Update Ptr to the end of the serialized map to meet the expectation of
+  // CodeGenDataReader.
+  Ptr = reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset);
+  auto IndexOperandHashesByteSize =
+      endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
+  Ptr = reinterpret_cast<const unsigned char *>(
+      reinterpret_cast<uintptr_t>(Ptr) + IndexOperandHashesByteSize);
+}
 
-    // Insert a new StableFunctionEntry into the map.
-    auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
-        Hash, FunctionNameId, ModuleNameId, InstCount,
-        std::move(IndexOperandHashMap));
+void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
+  deserialize(Ptr, /*Lazy=*/false);
+}
 
-    FunctionMap->insert(std::move(FuncEntry));
-  }
+void StableFunctionMapRecord::lazyDeserialize(
+    std::shared_ptr<MemoryBuffer> Buffer, uint64_t Offset) {
+  const auto *Ptr = reinterpret_cast<const unsigned char *>(
+      reinterpret_cast<uintptr_t>(Buffer->getBufferStart()) + Offset);
+  deserialize(Ptr, /*Lazy=*/true);
+  FunctionMap->Buffer = std::move(Buffer);
 }
 
 void StableFunctionMapRecord::serializeYAML(yaml::Output &YOS) const {

diff  --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 73f11c1345daf..47640c4aac6df 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -350,9 +350,8 @@ checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
   return true;
 }
 
-static ParamLocsVecTy computeParamInfo(
-    const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
-        &SFS) {
+static ParamLocsVecTy
+computeParamInfo(const StableFunctionMap::StableFunctionEntries &SFS) {
   std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
   auto &RSF = *SFS[0];
   unsigned StableFunctionCount = SFS.size();
@@ -396,19 +395,18 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
   // Collect stable functions related to the current module.
   DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
       HashToFuncs;
-  auto &Maps = FunctionMap->getFunctionMap();
   for (auto &F : M) {
     if (!isEligibleFunction(&F))
       continue;
     auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
-    if (Maps.contains(FI.FunctionHash))
+    if (FunctionMap->contains(FI.FunctionHash))
       HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
   }
 
   for (auto &[Hash, Funcs] : HashToFuncs) {
     std::optional<ParamLocsVecTy> ParamLocsVec;
     SmallVector<FuncMergeInfo> FuncMergeInfos;
-    auto &SFS = Maps.at(Hash);
+    auto &SFS = FunctionMap->at(Hash);
     assert(!SFS.empty());
     auto &RFS = SFS[0];
 

diff  --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
index a4022eb885b43..47042d23cc2ca 100644
--- a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
@@ -36,9 +36,11 @@
 
 ; Merge the cgdata using llvm-cgdata.
 ; We now validate the content of the merged cgdata.
-; Two functions have the same hash with only one 
diff erent constnat at a same location.
+; Two functions have the same hash with only one 
diff erent constant at the same location.
 ; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
 ; RUN: llvm-cgdata --convert %tout.cgdata   -o - | FileCheck %s
+; RUN: llvm-cgdata --merge -o %tout-lazy.cgdata %tout-nowrite.1 %tout-nowrite.2 -indexed-codegen-data-lazy-loading
+; RUN: llvm-cgdata --convert %tout-lazy.cgdata -indexed-codegen-data-lazy-loading -o - | FileCheck %s
 
 ; CHECK:      - Hash: [[#%d,HASH:]]
 ; CHECK-NEXT:   FunctionName: f1

diff  --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
index 0d2b0e848a2c9..2082eca58f073 100644
--- a/llvm/test/tools/llvm-cgdata/empty.test
+++ b/llvm/test/tools/llvm-cgdata/empty.test
@@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
 
 # The version number appears when asked, as it's in the header
 RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
-VERSION: Version: 3
+VERSION: Version: 4
 
 # When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
 RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@@ -30,7 +30,7 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
 #   uint64_t StableFunctionMapOffset;
 # }
 RUN: printf '\xffcgdata\x81' > %t_header.cgdata
-RUN: printf '\x03\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x04\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata

diff  --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
index 92ff484e31caf..9484371848a72 100644
--- a/llvm/test/tools/llvm-cgdata/error.test
+++ b/llvm/test/tools/llvm-cgdata/error.test
@@ -22,9 +22,9 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
 RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s  --check-prefix=CORRUPT
 CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
 
-# The current version 3 while the header says 4.
+# The current version 4 while the header says 5.
 RUN: printf '\xffcgdata\x81' > %t_version.cgdata
-RUN: printf '\x04\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x05\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
 RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata

diff  --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
index b060872113b1b..70b83af407e5a 100644
--- a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
+++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
@@ -23,6 +23,8 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcma
 # Merge an object file having cgdata (__llvm_outline and __llvm_merge)
 RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
 RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s
+RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 
 CHECK: Outlined hash tree:
 CHECK-NEXT:  Total Node Count: 3
@@ -63,4 +65,4 @@ CHECK-NEXT:  Mergeable function Count: 0
 
 ;--- merge-both-template.ll
 @.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
- at .data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

diff  --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
index 2936086321028..c088ffbb4e83f 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
@@ -23,8 +23,8 @@ RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
 # Merge the archive into the codegen data file.
 RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive.cgdata
 RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
-
-RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s
+RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata --show %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
 CHECK-NEXT:  Total function Count: 2
@@ -65,7 +65,7 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-1-template.ll
- at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
 
 ;--- raw-2.cgtext
 :stable_function_map
@@ -80,4 +80,4 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-2-template.ll
- at .data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

diff  --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
index d2965456a1999..90b5992973b49 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
@@ -17,6 +17,8 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-
 RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
 RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat.cgdata
 RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
+RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata --show %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
@@ -74,5 +76,5 @@ MAP-NEXT: ...
 ; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated.
 ; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated.
 ; In other words, the following two trees are encoded back-to-back in a binary format.
- at .data1 = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
- at .data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data1 = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

diff  --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
index 8277e3272d77e..b986aef26f1d7 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
@@ -19,8 +19,9 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
 
 # Merge two object files into the codegen data file.
 RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
-
 RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
+RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading
+RUN: llvm-cgdata --show %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading  | FileCheck %s
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
 CHECK-NEXT:  Total function Count: 2
@@ -61,7 +62,7 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-1-template.ll
- at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
 
 ;--- raw-2.cgtext
 :stable_function_map
@@ -76,4 +77,4 @@ MAP-NEXT: ...
 ...
 
 ;--- merge-2-template.ll
- at .data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

diff  --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
index 9469f1cbda331..eac852ff7e710 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
@@ -15,6 +15,8 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merg
 # Merge an object file having cgdata (__llvm_merge)
 RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single.cgdata
 RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
+RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading 
+RUN: llvm-cgdata -s %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
 CHECK: Stable function map:
 CHECK-NEXT:  Unique hash Count: 1
 CHECK-NEXT:  Total function Count: 1
@@ -33,4 +35,4 @@ CHECK-NEXT:  Mergeable function Count: 0
 ...
 
 ;--- merge-single-template.ll
- at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"

diff  --git a/llvm/tools/llvm-cgdata/Opts.td b/llvm/tools/llvm-cgdata/Opts.td
index 8da933f744e87..2b515a0140e67 100644
--- a/llvm/tools/llvm-cgdata/Opts.td
+++ b/llvm/tools/llvm-cgdata/Opts.td
@@ -31,3 +31,4 @@ def : JoinedOrSeparate<["-"], "o">, Alias<output>, MetaVarName<"<file>">, HelpTe
 def format : Option<["--"], "format", KIND_SEPARATE>,
              HelpText<"Specify the output format (text or binary)">, MetaVarName<"<value>">;
 def : JoinedOrSeparate<["-"], "f">, Alias<format>, HelpText<"Alias for --format">;
+def indexed_codegen_data_lazy_loading : F<"indexed-codegen-data-lazy-loading", "Lazily load indexed CodeGenData for testing purpose.">, Flags<[HelpHidden]>;

diff  --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
index 98fa5c5657353..047557e5a7fae 100644
--- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
+++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
@@ -83,6 +83,8 @@ static CGDataAction Action;
 static std::optional<CGDataFormat> OutputFormat;
 static std::vector<std::string> InputFilenames;
 
+extern cl::opt<bool> IndexedCodeGenDataLazyLoading;
+
 static void exitWithError(Twine Message, StringRef Whence = "",
                           StringRef Hint = "") {
   WithColor::error();
@@ -361,6 +363,9 @@ static void parseArgs(int argc, char **argv) {
   default:
     llvm_unreachable("unrecognized action");
   }
+
+  IndexedCodeGenDataLazyLoading =
+      Args.hasArg(OPT_indexed_codegen_data_lazy_loading);
 }
 
 int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {

diff  --git a/llvm/unittests/CGData/StableFunctionMapTest.cpp b/llvm/unittests/CGData/StableFunctionMapTest.cpp
index d551ac8a814f4..5cf62ae0b3943 100644
--- a/llvm/unittests/CGData/StableFunctionMapTest.cpp
+++ b/llvm/unittests/CGData/StableFunctionMapTest.cpp
@@ -117,7 +117,7 @@ TEST(StableFunctionMap, Finalize3) {
   Map.finalize();
   auto &M = Map.getFunctionMap();
   EXPECT_THAT(M, SizeIs(1));
-  auto &FuncEntries = M.begin()->second;
+  auto &FuncEntries = M.begin()->second.Entries;
   for (auto &FuncEntry : FuncEntries) {
     EXPECT_THAT(*FuncEntry->IndexOperandHashMap, SizeIs(1));
     ASSERT_THAT(*FuncEntry->IndexOperandHashMap,