[llvm] 07d3a73 - Revert "[CGData] Lazy loading support for stable function map (#151660)"
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 14 16:57:14 PDT 2025
Author: Kyungwoo Lee
Date: 2025-08-14T16:56:54-07:00
New Revision: 07d3a73d70cac6e58ca9002c98e31423c26cc735
URL: https://github.com/llvm/llvm-project/commit/07d3a73d70cac6e58ca9002c98e31423c26cc735
DIFF: https://github.com/llvm/llvm-project/commit/07d3a73d70cac6e58ca9002c98e31423c26cc735.diff
LOG: Revert "[CGData] Lazy loading support for stable function map (#151660)"
This reverts commit 76dd742f7b32e4d3acf50fab1dbbd897f215837e.
Added:
Modified:
llvm/include/llvm/CGData/CodeGenData.h
llvm/include/llvm/CGData/CodeGenData.inc
llvm/include/llvm/CGData/StableFunctionMap.h
llvm/include/llvm/CGData/StableFunctionMapRecord.h
llvm/lib/CGData/CodeGenData.cpp
llvm/lib/CGData/CodeGenDataReader.cpp
llvm/lib/CGData/StableFunctionMap.cpp
llvm/lib/CGData/StableFunctionMapRecord.cpp
llvm/lib/CodeGen/GlobalMergeFunctions.cpp
llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
llvm/test/tools/llvm-cgdata/empty.test
llvm/test/tools/llvm-cgdata/error.test
llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
llvm/tools/llvm-cgdata/Opts.td
llvm/tools/llvm-cgdata/llvm-cgdata.cpp
llvm/unittests/CGData/StableFunctionMapTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index e44497a408245..38b96b72ccac6 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -285,9 +285,6 @@ enum CGDataVersion {
// Version 3 adds the total size of the Names in the stable function map so
// we can skip reading them into the memory for non-assertion builds.
Version3 = 3,
- // Version 4 adjusts the structure of stable function merging map for
- // efficient lazy loading support.
- Version4 = 4,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;
diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc
index d5fbe2fb97718..94de4c0b017a2 100644
--- a/llvm/include/llvm/CGData/CodeGenData.inc
+++ b/llvm/include/llvm/CGData/CodeGenData.inc
@@ -49,4 +49,4 @@ CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
#endif
/* Indexed codegen data format version (start from 1). */
-#define CG_DATA_INDEX_VERSION 4
+#define CG_DATA_INDEX_VERSION 3
diff --git a/llvm/include/llvm/CGData/StableFunctionMap.h b/llvm/include/llvm/CGData/StableFunctionMap.h
index ea3523c3a3299..bcb72e8216973 100644
--- a/llvm/include/llvm/CGData/StableFunctionMap.h
+++ b/llvm/include/llvm/CGData/StableFunctionMap.h
@@ -20,8 +20,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <mutex>
namespace llvm {
@@ -74,37 +72,11 @@ struct StableFunctionMap {
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
};
- using StableFunctionEntries =
- SmallVector<std::unique_ptr<StableFunctionEntry>>;
-
- /// In addition to the deserialized StableFunctionEntry, the struct stores
- /// the offsets of corresponding serialized stable function entries, and a
- /// once flag for safe lazy loading in a multithreaded environment.
- struct EntryStorage {
- /// The actual storage of deserialized stable function entries. If the map
- /// is lazily loaded, this will be empty until the first access by the
- /// corresponding function hash.
- StableFunctionEntries Entries;
-
- private:
- /// This is used to deserialize the entry lazily. Each element is the
- /// corresponding serialized stable function entry's offset in the memory
- /// buffer (StableFunctionMap::Buffer).
- /// The offsets are only populated when loading the map lazily, otherwise
- /// it is empty.
- SmallVector<uint64_t> Offsets;
- std::once_flag LazyLoadFlag;
- friend struct StableFunctionMap;
- friend struct StableFunctionMapRecord;
- };
-
- // Note: DenseMap requires value type to be copyable even if only using
- // in-place insertion. Use STL instead. This also affects the
- // deletion-while-iteration in finalize().
- using HashFuncsMapType = std::unordered_map<stable_hash, EntryStorage>;
+ using HashFuncsMapType =
+ DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
/// Get the HashToFuncs map for serialization.
- const HashFuncsMapType &getFunctionMap() const;
+ const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
/// Get the NameToId vector for serialization.
ArrayRef<std::string> getNames() const { return IdToName; }
@@ -127,19 +99,6 @@ struct StableFunctionMap {
/// \returns true if there is no stable function entry.
bool empty() const { return size() == 0; }
- /// \returns true if there is an entry for the given function hash.
- /// This does not trigger lazy loading.
- bool contains(HashFuncsMapType::key_type FunctionHash) const {
- return HashToFuncs.count(FunctionHash) > 0;
- }
-
- /// \returns the stable function entries for the given function hash. If the
- /// map is lazily loaded, it will deserialize the entries if it is not already
- /// done, other requests to the same hash at the same time will be blocked
- /// until the entries are deserialized.
- const StableFunctionEntries &
- at(HashFuncsMapType::key_type FunctionHash) const;
-
enum SizeType {
UniqueHashCount, // The number of unique hashes in HashToFuncs.
TotalFunctionCount, // The number of total functions in HashToFuncs.
@@ -160,31 +119,17 @@ struct StableFunctionMap {
/// `StableFunctionEntry` is ready for insertion.
void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
assert(!Finalized && "Cannot insert after finalization");
- HashToFuncs[FuncEntry->Hash].Entries.emplace_back(std::move(FuncEntry));
+ HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
}
- void deserializeLazyLoadingEntry(HashFuncsMapType::iterator It) const;
-
- /// Eagerly deserialize all the unloaded entries in the lazy loading map.
- void deserializeLazyLoadingEntries() const;
-
- bool isLazilyLoaded() const { return (bool)Buffer; }
-
/// A map from a stable_hash to a vector of functions with that hash.
- mutable HashFuncsMapType HashToFuncs;
+ HashFuncsMapType HashToFuncs;
/// A vector of strings to hold names.
SmallVector<std::string> IdToName;
/// A map from StringRef (name) to an ID.
StringMap<unsigned> NameToId;
/// True if the function map is finalized with minimal content.
bool Finalized = false;
- /// The memory buffer that contains the serialized stable function map for
- /// lazy loading.
- /// Non-empty only if this StableFunctionMap is created from a MemoryBuffer
- /// (i.e. by IndexedCodeGenDataReader::read()) and lazily deserialized.
- std::shared_ptr<MemoryBuffer> Buffer;
- /// Whether to read stable function names from the buffer.
- bool ReadStableFunctionMapNames = true;
friend struct StableFunctionMapRecord;
};
diff --git a/llvm/include/llvm/CGData/StableFunctionMapRecord.h b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
index 2d8b573a3cb46..a75cb12a70ba6 100644
--- a/llvm/include/llvm/CGData/StableFunctionMapRecord.h
+++ b/llvm/include/llvm/CGData/StableFunctionMapRecord.h
@@ -24,26 +24,6 @@
namespace llvm {
-/// The structure of the serialized stable function map is as follows:
-/// - Number of unique function/module names
-/// - Total size of unique function/module names for opt-in skipping
-/// - Unique function/module names
-/// - Padding to align to 4 bytes
-/// - Number of StableFunctionEntries
-/// - Hashes of each StableFunctionEntry
-/// - Fixed-size fields for each StableFunctionEntry (the order is consistent
-/// with the hashes above):
-/// - FunctionNameId
-/// - ModuleNameId
-/// - InstCount
-/// - Relative offset to the beginning of IndexOperandHashes for this entry
-/// - Total size of variable-sized IndexOperandHashes for lazy-loading support
-/// - Variable-sized IndexOperandHashes for each StableFunctionEntry:
-/// - Number of IndexOperandHashes
-/// - Contents of each IndexOperandHashes
-/// - InstIndex
-/// - OpndIndex
-/// - OpndHash
struct StableFunctionMapRecord {
std::unique_ptr<StableFunctionMap> FunctionMap;
@@ -60,25 +40,13 @@ struct StableFunctionMapRecord {
const StableFunctionMap *FunctionMap,
std::vector<CGDataPatchItem> &PatchItems);
- /// A static helper function to deserialize the stable function map entry.
- /// Ptr should be pointing to the start of the fixed-sized fields of the
- /// entry when passed in.
- LLVM_ABI static void deserializeEntry(const unsigned char *Ptr,
- stable_hash Hash,
- StableFunctionMap *FunctionMap);
-
/// Serialize the stable function map to a raw_ostream.
LLVM_ABI void serialize(raw_ostream &OS,
std::vector<CGDataPatchItem> &PatchItems) const;
/// Deserialize the stable function map from a raw_ostream.
- LLVM_ABI void deserialize(const unsigned char *&Ptr);
-
- /// Lazily deserialize the stable function map from `Buffer` starting at
- /// `Offset`. The individual stable function entry would be read lazily from
- /// `Buffer` when the function map is accessed.
- LLVM_ABI void lazyDeserialize(std::shared_ptr<MemoryBuffer> Buffer,
- uint64_t Offset);
+ LLVM_ABI void deserialize(const unsigned char *&Ptr,
+ bool ReadStableFunctionMapNames = true);
/// Serialize the stable function map to a YAML stream.
LLVM_ABI void serializeYAML(yaml::Output &YOS) const;
@@ -102,18 +70,6 @@ struct StableFunctionMapRecord {
yaml::Output YOS(OS);
serializeYAML(YOS);
}
-
- /// Set whether to read stable function names from the buffer.
- /// Has no effect if the function map is read from a YAML stream.
- void setReadStableFunctionMapNames(bool Read) {
- assert(
- FunctionMap->empty() &&
- "Cannot change ReadStableFunctionMapNames after the map is populated");
- FunctionMap->ReadStableFunctionMapNames = Read;
- }
-
-private:
- void deserialize(const unsigned char *&Ptr, bool Lazy);
};
} // namespace llvm
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index b4f08c3d13b0d..cd012342e1958 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -186,7 +186,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
return make_error<CGDataError>(cgdata_error::unsupported_version);
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
- static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version4,
+ static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3,
"Please update the offset computation below if a new field has "
"been added to the header.");
H.OutlinedHashTreeOffset =
diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index fc59be8df525a..0ab35499c8986 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -26,12 +26,6 @@ static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
"disabled to save memory and time for final consumption of the "
"indexed CodeGenData in production."));
-cl::opt<bool> IndexedCodeGenDataLazyLoading(
- "indexed-codegen-data-lazy-loading", cl::init(false), cl::Hidden,
- cl::desc(
- "Lazily load indexed CodeGenData. Enable to save memory and time "
- "for final consumption of the indexed CodeGenData in production."));
-
namespace llvm {
static Expected<std::unique_ptr<MemoryBuffer>>
@@ -115,20 +109,11 @@ Error IndexedCodeGenDataReader::read() {
return error(cgdata_error::eof);
HashTreeRecord.deserialize(Ptr);
}
-
- // TODO: lazy loading support for outlined hash tree.
- std::shared_ptr<MemoryBuffer> SharedDataBuffer = std::move(DataBuffer);
if (hasStableFunctionMap()) {
const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
if (Ptr >= End)
return error(cgdata_error::eof);
- FunctionMapRecord.setReadStableFunctionMapNames(
- IndexedCodeGenDataReadFunctionMapNames);
- if (IndexedCodeGenDataLazyLoading)
- FunctionMapRecord.lazyDeserialize(SharedDataBuffer,
- Header.StableFunctionMapOffset);
- else
- FunctionMapRecord.deserialize(Ptr);
+ FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
}
return success();
diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp
index 2f54fad0aa084..87f1e76afb60b 100644
--- a/llvm/lib/CGData/StableFunctionMap.cpp
+++ b/llvm/lib/CGData/StableFunctionMap.cpp
@@ -15,10 +15,8 @@
#include "llvm/CGData/StableFunctionMap.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include <mutex>
#define DEBUG_TYPE "stable-function-map"
@@ -95,10 +93,9 @@ void StableFunctionMap::insert(const StableFunction &Func) {
void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
assert(!Finalized && "Cannot merge after finalization");
- deserializeLazyLoadingEntries();
for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
- auto &ThisFuncs = HashToFuncs[Hash].Entries;
- for (auto &Func : Funcs.Entries) {
+ auto &ThisFuncs = HashToFuncs[Hash];
+ for (auto &Func : Funcs) {
auto FuncNameId =
getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
auto ModuleNameId =
@@ -117,63 +114,25 @@ size_t StableFunctionMap::size(SizeType Type) const {
case UniqueHashCount:
return HashToFuncs.size();
case TotalFunctionCount: {
- deserializeLazyLoadingEntries();
size_t Count = 0;
for (auto &Funcs : HashToFuncs)
- Count += Funcs.second.Entries.size();
+ Count += Funcs.second.size();
return Count;
}
case MergeableFunctionCount: {
- deserializeLazyLoadingEntries();
size_t Count = 0;
for (auto &[Hash, Funcs] : HashToFuncs)
- if (Funcs.Entries.size() >= 2)
- Count += Funcs.Entries.size();
+ if (Funcs.size() >= 2)
+ Count += Funcs.size();
return Count;
}
}
llvm_unreachable("Unhandled size type");
}
-const StableFunctionMap::StableFunctionEntries &
-StableFunctionMap::at(HashFuncsMapType::key_type FunctionHash) const {
- auto It = HashToFuncs.find(FunctionHash);
- if (isLazilyLoaded())
- deserializeLazyLoadingEntry(It);
- return It->second.Entries;
-}
-
-void StableFunctionMap::deserializeLazyLoadingEntry(
- HashFuncsMapType::iterator It) const {
- assert(isLazilyLoaded() && "Cannot deserialize non-lazily-loaded map");
- auto &[Hash, Storage] = *It;
- std::call_once(Storage.LazyLoadFlag,
- [this, HashArg = Hash, &StorageArg = Storage]() {
- for (auto Offset : StorageArg.Offsets)
- StableFunctionMapRecord::deserializeEntry(
- reinterpret_cast<const unsigned char *>(Offset),
- HashArg, const_cast<StableFunctionMap *>(this));
- });
-}
-
-void StableFunctionMap::deserializeLazyLoadingEntries() const {
- if (!isLazilyLoaded())
- return;
- for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It)
- deserializeLazyLoadingEntry(It);
-}
-
-const StableFunctionMap::HashFuncsMapType &
-StableFunctionMap::getFunctionMap() const {
- // Ensure all entries are deserialized before returning the raw map.
- if (isLazilyLoaded())
- deserializeLazyLoadingEntries();
- return HashToFuncs;
-}
-
using ParamLocs = SmallVector<IndexPair>;
-static void
-removeIdenticalIndexPair(StableFunctionMap::StableFunctionEntries &SFS) {
+static void removeIdenticalIndexPair(
+ SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> &SFS) {
auto &RSF = SFS[0];
unsigned StableFunctionCount = SFS.size();
@@ -200,7 +159,9 @@ removeIdenticalIndexPair(StableFunctionMap::StableFunctionEntries &SFS) {
SF->IndexOperandHashMap->erase(Pair);
}
-static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS) {
+static bool isProfitable(
+ const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
+ &SFS) {
unsigned StableFunctionCount = SFS.size();
if (StableFunctionCount < GlobalMergingMinMerges)
return false;
@@ -241,11 +202,8 @@ static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS) {
}
void StableFunctionMap::finalize(bool SkipTrim) {
- deserializeLazyLoadingEntries();
- SmallVector<HashFuncsMapType::iterator> ToDelete;
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
- auto &[StableHash, Storage] = *It;
- auto &SFS = Storage.Entries;
+ auto &[StableHash, SFS] = *It;
// Group stable functions by ModuleIdentifier.
llvm::stable_sort(SFS, [&](const std::unique_ptr<StableFunctionEntry> &L,
@@ -278,7 +236,7 @@ void StableFunctionMap::finalize(bool SkipTrim) {
}
}
if (Invalid) {
- ToDelete.push_back(It);
+ HashToFuncs.erase(It);
continue;
}
@@ -290,10 +248,8 @@ void StableFunctionMap::finalize(bool SkipTrim) {
removeIdenticalIndexPair(SFS);
if (!isProfitable(SFS))
- ToDelete.push_back(It);
+ HashToFuncs.erase(It);
}
- for (auto It : ToDelete)
- HashToFuncs.erase(It);
Finalized = true;
}
diff --git a/llvm/lib/CGData/StableFunctionMapRecord.cpp b/llvm/lib/CGData/StableFunctionMapRecord.cpp
index e585995ba6a31..423e068023088 100644
--- a/llvm/lib/CGData/StableFunctionMapRecord.cpp
+++ b/llvm/lib/CGData/StableFunctionMapRecord.cpp
@@ -53,7 +53,7 @@ static SmallVector<const StableFunctionMap::StableFunctionEntry *>
getStableFunctionEntries(const StableFunctionMap &SFM) {
SmallVector<const StableFunctionMap::StableFunctionEntry *> FuncEntries;
for (const auto &P : SFM.getFunctionMap())
- for (auto &Func : P.second.Entries)
+ for (auto &Func : P.second)
FuncEntries.emplace_back(Func.get());
llvm::stable_sort(
@@ -107,25 +107,14 @@ void StableFunctionMapRecord::serialize(
// Write StableFunctionEntries whose pointers are sorted.
auto FuncEntries = getStableFunctionEntries(*FunctionMap);
Writer.write<uint32_t>(FuncEntries.size());
- for (const auto *FuncRef : FuncEntries)
- Writer.write<stable_hash>(FuncRef->Hash);
- std::vector<uint64_t> IndexOperandHashesOffsets;
- IndexOperandHashesOffsets.reserve(FuncEntries.size());
+
for (const auto *FuncRef : FuncEntries) {
+ Writer.write<stable_hash>(FuncRef->Hash);
Writer.write<uint32_t>(FuncRef->FunctionNameId);
Writer.write<uint32_t>(FuncRef->ModuleNameId);
Writer.write<uint32_t>(FuncRef->InstCount);
- const uint64_t Offset = Writer.OS.tell();
- IndexOperandHashesOffsets.push_back(Offset);
- Writer.write<uint64_t>(0);
- }
- const uint64_t IndexOperandHashesByteSizeOffset = Writer.OS.tell();
- Writer.write<uint64_t>(0);
- for (size_t I = 0; I < FuncEntries.size(); ++I) {
- const uint64_t Offset = Writer.OS.tell() - IndexOperandHashesOffsets[I];
- PatchItems.emplace_back(IndexOperandHashesOffsets[I], &Offset, 1);
+
// Emit IndexOperandHashes sorted from IndexOperandHashMap.
- const auto *FuncRef = FuncEntries[I];
IndexOperandHashVecType IndexOperandHashes =
getStableIndexOperandHashes(FuncRef);
Writer.write<uint32_t>(IndexOperandHashes.size());
@@ -135,62 +124,10 @@ void StableFunctionMapRecord::serialize(
Writer.write<stable_hash>(IndexOperandHash.second);
}
}
- // Write the total size of IndexOperandHashes.
- const uint64_t IndexOperandHashesByteSize =
- Writer.OS.tell() - IndexOperandHashesByteSizeOffset - sizeof(uint64_t);
- PatchItems.emplace_back(IndexOperandHashesByteSizeOffset,
- &IndexOperandHashesByteSize, 1);
-}
-
-void StableFunctionMapRecord::deserializeEntry(const unsigned char *Ptr,
- stable_hash Hash,
- StableFunctionMap *FunctionMap) {
- auto FunctionNameId =
- endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
- if (FunctionMap->ReadStableFunctionMapNames)
- assert(FunctionMap->getNameForId(FunctionNameId) &&
- "FunctionNameId out of range");
- auto ModuleNameId =
- endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
- if (FunctionMap->ReadStableFunctionMapNames)
- assert(FunctionMap->getNameForId(ModuleNameId) &&
- "ModuleNameId out of range");
- auto InstCount =
- endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
-
- // Read IndexOperandHashes to build IndexOperandHashMap
- auto CurrentPosition = reinterpret_cast<uintptr_t>(Ptr);
- auto IndexOperandHashesOffset =
- endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
- auto *IndexOperandHashesPtr = reinterpret_cast<const unsigned char *>(
- CurrentPosition + IndexOperandHashesOffset);
- auto NumIndexOperandHashes =
- endian::readNext<uint32_t, endianness::little, unaligned>(
- IndexOperandHashesPtr);
- auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
- for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
- auto InstIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
- IndexOperandHashesPtr);
- auto OpndIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
- IndexOperandHashesPtr);
- auto OpndHash =
- endian::readNext<stable_hash, endianness::little, unaligned>(
- IndexOperandHashesPtr);
- assert(InstIndex < InstCount && "InstIndex out of range");
-
- IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
- }
-
- // Insert a new StableFunctionEntry into the map.
- auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
- Hash, FunctionNameId, ModuleNameId, InstCount,
- std::move(IndexOperandHashMap));
-
- FunctionMap->insert(std::move(FuncEntry));
}
void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
- bool Lazy) {
+ bool ReadStableFunctionMapNames) {
// Assert that Ptr is 4-byte aligned
assert(((uintptr_t)Ptr % 4) == 0);
// Read Names.
@@ -202,7 +139,7 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
const auto NamesByteSize =
endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
const auto NamesOffset = reinterpret_cast<uintptr_t>(Ptr);
- if (FunctionMap->ReadStableFunctionMapNames) {
+ if (ReadStableFunctionMapNames) {
for (unsigned I = 0; I < NumNames; ++I) {
StringRef Name(reinterpret_cast<const char *>(Ptr));
Ptr += Name.size() + 1;
@@ -220,51 +157,47 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
// Read StableFunctionEntries.
auto NumFuncs =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
- auto FixedSizeFieldsOffset =
- reinterpret_cast<uintptr_t>(Ptr) + NumFuncs * sizeof(stable_hash);
- constexpr uint32_t FixedSizeFieldsSizePerEntry =
- // FunctionNameId
- sizeof(uint32_t) +
- // ModuleNameId
- sizeof(uint32_t) +
- // InstCount
- sizeof(uint32_t) +
- // Relative offset to IndexOperandHashes
- sizeof(uint64_t);
for (unsigned I = 0; I < NumFuncs; ++I) {
auto Hash =
endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
- if (Lazy) {
- auto It = FunctionMap->HashToFuncs.try_emplace(Hash).first;
- StableFunctionMap::EntryStorage &Storage = It->second;
- Storage.Offsets.push_back(FixedSizeFieldsOffset);
- } else {
- deserializeEntry(
- reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset), Hash,
- FunctionMap.get());
+ [[maybe_unused]] auto FunctionNameId =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ [[maybe_unused]] auto ModuleNameId =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ // Only validate IDs if we've read the names
+ if (ReadStableFunctionMapNames) {
+ assert(FunctionMap->getNameForId(FunctionNameId) &&
+ "FunctionNameId out of range");
+ assert(FunctionMap->getNameForId(ModuleNameId) &&
+ "ModuleNameId out of range");
}
- FixedSizeFieldsOffset += FixedSizeFieldsSizePerEntry;
- }
- // Update Ptr to the end of the serialized map to meet the expectation of
- // CodeGenDataReader.
- Ptr = reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset);
- auto IndexOperandHashesByteSize =
- endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
- Ptr = reinterpret_cast<const unsigned char *>(
- reinterpret_cast<uintptr_t>(Ptr) + IndexOperandHashesByteSize);
-}
+ auto InstCount =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+
+ // Read IndexOperandHashes to build IndexOperandHashMap
+ auto NumIndexOperandHashes =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+ for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
+ auto InstIndex =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ auto OpndIndex =
+ endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
+ auto OpndHash =
+ endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
+ assert(InstIndex < InstCount && "InstIndex out of range");
+
+ IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
+ }
-void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
- deserialize(Ptr, /*Lazy=*/false);
-}
+ // Insert a new StableFunctionEntry into the map.
+ auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
+ Hash, FunctionNameId, ModuleNameId, InstCount,
+ std::move(IndexOperandHashMap));
-void StableFunctionMapRecord::lazyDeserialize(
- std::shared_ptr<MemoryBuffer> Buffer, uint64_t Offset) {
- const auto *Ptr = reinterpret_cast<const unsigned char *>(
- reinterpret_cast<uintptr_t>(Buffer->getBufferStart()) + Offset);
- deserialize(Ptr, /*Lazy=*/true);
- FunctionMap->Buffer = std::move(Buffer);
+ FunctionMap->insert(std::move(FuncEntry));
+ }
}
void StableFunctionMapRecord::serializeYAML(yaml::Output &YOS) const {
diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 47640c4aac6df..73f11c1345daf 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -350,8 +350,9 @@ checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
return true;
}
-static ParamLocsVecTy
-computeParamInfo(const StableFunctionMap::StableFunctionEntries &SFS) {
+static ParamLocsVecTy computeParamInfo(
+ const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
+ &SFS) {
std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
auto &RSF = *SFS[0];
unsigned StableFunctionCount = SFS.size();
@@ -395,18 +396,19 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
// Collect stable functions related to the current module.
DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
HashToFuncs;
+ auto &Maps = FunctionMap->getFunctionMap();
for (auto &F : M) {
if (!isEligibleFunction(&F))
continue;
auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
- if (FunctionMap->contains(FI.FunctionHash))
+ if (Maps.contains(FI.FunctionHash))
HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
}
for (auto &[Hash, Funcs] : HashToFuncs) {
std::optional<ParamLocsVecTy> ParamLocsVec;
SmallVector<FuncMergeInfo> FuncMergeInfos;
- auto &SFS = FunctionMap->at(Hash);
+ auto &SFS = Maps.at(Hash);
assert(!SFS.empty());
auto &RFS = SFS[0];
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
index 47042d23cc2ca..a4022eb885b43 100644
--- a/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
+++ b/llvm/test/ThinLTO/AArch64/cgdata-merge-write.ll
@@ -36,11 +36,9 @@
; Merge the cgdata using llvm-cgdata.
; We now validate the content of the merged cgdata.
-; Two functions have the same hash with only one
diff erent constant at the same location.
+; Two functions have the same hash with only one
diff erent constnat at a same location.
; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s
-; RUN: llvm-cgdata --merge -o %tout-lazy.cgdata %tout-nowrite.1 %tout-nowrite.2 -indexed-codegen-data-lazy-loading
-; RUN: llvm-cgdata --convert %tout-lazy.cgdata -indexed-codegen-data-lazy-loading -o - | FileCheck %s
; CHECK: - Hash: [[#%d,HASH:]]
; CHECK-NEXT: FunctionName: f1
diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
index 2082eca58f073..0d2b0e848a2c9 100644
--- a/llvm/test/tools/llvm-cgdata/empty.test
+++ b/llvm/test/tools/llvm-cgdata/empty.test
@@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
# The version number appears when asked, as it's in the header
RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
-VERSION: Version: 4
+VERSION: Version: 3
# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@@ -30,7 +30,7 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
# uint64_t StableFunctionMapOffset;
# }
RUN: printf '\xffcgdata\x81' > %t_header.cgdata
-RUN: printf '\x04\x00\x00\x00' >> %t_header.cgdata
+RUN: printf '\x03\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
index 9484371848a72..92ff484e31caf 100644
--- a/llvm/test/tools/llvm-cgdata/error.test
+++ b/llvm/test/tools/llvm-cgdata/error.test
@@ -22,9 +22,9 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT
CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
-# The current version 4 while the header says 5.
+# The current version 3 while the header says 4.
RUN: printf '\xffcgdata\x81' > %t_version.cgdata
-RUN: printf '\x05\x00\x00\x00' >> %t_version.cgdata
+RUN: printf '\x04\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
index 70b83af407e5a..b060872113b1b 100644
--- a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
+++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test
@@ -23,8 +23,6 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcma
# Merge an object file having cgdata (__llvm_outline and __llvm_merge)
RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s
-RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading
-RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Outlined hash tree:
CHECK-NEXT: Total Node Count: 3
@@ -65,4 +63,4 @@ CHECK-NEXT: Mergeable function Count: 0
;--- merge-both-template.ll
@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
- at .data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
index c088ffbb4e83f..2936086321028 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
@@ -23,8 +23,8 @@ RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
# Merge the archive into the codegen data file.
RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive.cgdata
RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
-RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading
-RUN: llvm-cgdata --show %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
+
+RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
CHECK-NEXT: Total function Count: 2
@@ -65,7 +65,7 @@ MAP-NEXT: ...
...
;--- merge-1-template.ll
- at .data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
;--- raw-2.cgtext
:stable_function_map
@@ -80,4 +80,4 @@ MAP-NEXT: ...
...
;--- merge-2-template.ll
- at .data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
index 90b5992973b49..d2965456a1999 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test
@@ -17,8 +17,6 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat.cgdata
RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
-RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading
-RUN: llvm-cgdata --show %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
@@ -76,5 +74,5 @@ MAP-NEXT: ...
; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated.
; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated.
; In other words, the following two trees are encoded back-to-back in a binary format.
- at .data1 = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
- at .data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data1 = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
index b986aef26f1d7..8277e3272d77e 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test
@@ -19,9 +19,8 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
# Merge two object files into the codegen data file.
RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
+
RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
-RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading
-RUN: llvm-cgdata --show %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
CHECK-NEXT: Total function Count: 2
@@ -62,7 +61,7 @@ MAP-NEXT: ...
...
;--- merge-1-template.ll
- at .data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
;--- raw-2.cgtext
:stable_function_map
@@ -77,4 +76,4 @@ MAP-NEXT: ...
...
;--- merge-2-template.ll
- at .data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
index eac852ff7e710..9469f1cbda331 100644
--- a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
+++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test
@@ -15,8 +15,6 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merg
# Merge an object file having cgdata (__llvm_merge)
RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single.cgdata
RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
-RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading
-RUN: llvm-cgdata -s %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
CHECK-NEXT: Total function Count: 1
@@ -35,4 +33,4 @@ CHECK-NEXT: Mergeable function Count: 0
...
;--- merge-single-template.ll
- at .data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
+ at .data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
diff --git a/llvm/tools/llvm-cgdata/Opts.td b/llvm/tools/llvm-cgdata/Opts.td
index 2b515a0140e67..8da933f744e87 100644
--- a/llvm/tools/llvm-cgdata/Opts.td
+++ b/llvm/tools/llvm-cgdata/Opts.td
@@ -31,4 +31,3 @@ def : JoinedOrSeparate<["-"], "o">, Alias<output>, MetaVarName<"<file>">, HelpTe
def format : Option<["--"], "format", KIND_SEPARATE>,
HelpText<"Specify the output format (text or binary)">, MetaVarName<"<value>">;
def : JoinedOrSeparate<["-"], "f">, Alias<format>, HelpText<"Alias for --format">;
-def indexed_codegen_data_lazy_loading : F<"indexed-codegen-data-lazy-loading", "Lazily load indexed CodeGenData for testing purpose.">, Flags<[HelpHidden]>;
diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
index 047557e5a7fae..98fa5c5657353 100644
--- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
+++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
@@ -83,8 +83,6 @@ static CGDataAction Action;
static std::optional<CGDataFormat> OutputFormat;
static std::vector<std::string> InputFilenames;
-extern cl::opt<bool> IndexedCodeGenDataLazyLoading;
-
static void exitWithError(Twine Message, StringRef Whence = "",
StringRef Hint = "") {
WithColor::error();
@@ -363,9 +361,6 @@ static void parseArgs(int argc, char **argv) {
default:
llvm_unreachable("unrecognized action");
}
-
- IndexedCodeGenDataLazyLoading =
- Args.hasArg(OPT_indexed_codegen_data_lazy_loading);
}
int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {
diff --git a/llvm/unittests/CGData/StableFunctionMapTest.cpp b/llvm/unittests/CGData/StableFunctionMapTest.cpp
index 5cf62ae0b3943..d551ac8a814f4 100644
--- a/llvm/unittests/CGData/StableFunctionMapTest.cpp
+++ b/llvm/unittests/CGData/StableFunctionMapTest.cpp
@@ -117,7 +117,7 @@ TEST(StableFunctionMap, Finalize3) {
Map.finalize();
auto &M = Map.getFunctionMap();
EXPECT_THAT(M, SizeIs(1));
- auto &FuncEntries = M.begin()->second.Entries;
+ auto &FuncEntries = M.begin()->second;
for (auto &FuncEntry : FuncEntries) {
EXPECT_THAT(*FuncEntry->IndexOperandHashMap, SizeIs(1));
ASSERT_THAT(*FuncEntry->IndexOperandHashMap,
More information about the llvm-commits
mailing list