[clang] [llvm] [LTO] Reduce memory usage for import lists (PR #106772)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 30 11:00:53 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-clang-codegen
Author: Kazu Hirata (kazutakahirata)
<details>
<summary>Changes</summary>
This patch reduces the memory usage for import lists by employing
memory-efficient data structures.
With this patch, an import list for a given destination module is
basically DenseSet<uint32_t> with each element indexing into the
deduplication table containing tuples of:
{SourceModule, GUID, Definition/Declaration}
In one of our large applications, the peak memory usage goes down by
9.2% from 6.120GB to 5.555GB during the LTO indexing step.
This patch addresses several sources of space inefficiency associated
with std::unordered_map:
- std::unordered_map<GUID, ImportKind> takes up 16 bytes because of
padding even though ImportKind only carries one bit of information.
- std::unordered_map uses pointers to elements, both in the hash table
proper and for collision chains.
- We allocate an instance of std::unordered_map for each
{Destination Module, Source Module} pair for which we have at least
one import. Most import lists have less than 10 imports, so the
metadata like the size of std::unordered_map and the pointer to the
hash table costs a lot relative to the actual contents.
---
Full diff: https://github.com/llvm/llvm-project/pull/106772.diff
5 Files Affected:
- (modified) clang/lib/CodeGen/BackendUtil.cpp (+2-1)
- (modified) llvm/include/llvm/Transforms/IPO/FunctionImport.h (+54-17)
- (modified) llvm/lib/LTO/LTO.cpp (+32-52)
- (modified) llvm/lib/Transforms/IPO/FunctionImport.cpp (+51-48)
- (modified) llvm/tools/llvm-link/llvm-link.cpp (+2-1)
``````````diff
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 564efa3181d188..7fa69420298160 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1206,7 +1206,8 @@ static void runThinLTOBackend(
// We can simply import the values mentioned in the combined index, since
// we should only invoke this using the individual indexes written out
// via a WriteIndexesThinBackend.
- FunctionImporter::ImportMapTy ImportList;
+ FunctionImporter::ImportIDTable ImportIDs;
+ FunctionImporter::ImportMapTy ImportList(ImportIDs);
if (!lto::initImportList(*M, *CombinedIndex, ImportList))
return;
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 99fe110191dec9..b5b328d96b2737 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -21,8 +21,6 @@
#include <memory>
#include <string>
#include <system_error>
-#include <unordered_map>
-#include <unordered_set>
#include <utility>
namespace llvm {
@@ -33,14 +31,6 @@ class Module;
/// based on the provided summary informations.
class FunctionImporter {
public:
- /// The functions to import from a source module and their import type.
- /// Note we choose unordered_map over (Small)DenseMap. The number of imports
- /// from a source module could be small but DenseMap size grows to 64 quickly
- /// and not memory efficient (see
- /// https://llvm.org/docs/ProgrammersManual.html#llvm-adt-densemap-h)
- using FunctionsToImportTy =
- std::unordered_map<GlobalValue::GUID, GlobalValueSummary::ImportKind>;
-
/// The different reasons selectCallee will chose not to import a
/// candidate.
enum class ImportFailureReason {
@@ -156,6 +146,12 @@ class FunctionImporter {
return std::make_tuple(FromModule, GUID, Kind);
}
+ // The same as lookup above. Useful for map_iterator.
+ std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind>
+ operator()(ImportIDTable::ImportIDTy ImportID) const {
+ return lookup(ImportID);
+ }
+
private:
// Make a pair of import IDs [Def, Decl] from an index into TheTable.
static std::pair<ImportIDTy, ImportIDTy> makeIDPair(ImportIDTy Index) {
@@ -167,6 +163,9 @@ class FunctionImporter {
MapVector<std::pair<StringRef, GlobalValue::GUID>, ImportIDTy> TheTable;
};
+ // Forward-declare SortedImportList for ImportMapTy.
+ class SortedImportList;
+
/// The map maintains the list of imports. Conceptually, it is a collection
/// of tuples of the form:
///
@@ -179,8 +178,6 @@ class FunctionImporter {
/// path string table).
class ImportMapTy {
public:
- using ImportMapTyImpl = DenseMap<StringRef, FunctionsToImportTy>;
-
enum class AddDefinitionStatus {
// No change was made to the list of imports or whether each import should
// be imported as a declaration or definition.
@@ -192,6 +189,9 @@ class FunctionImporter {
ChangedToDefinition,
};
+ ImportMapTy() = delete;
+ ImportMapTy(ImportIDTable &IDs) : IDs(IDs) {}
+
// Add the given GUID to ImportList as a definition. If the same GUID has
// been added as a declaration previously, that entry is overridden.
AddDefinitionStatus addDefinition(StringRef FromModule,
@@ -215,13 +215,49 @@ class FunctionImporter {
SmallVector<StringRef, 0> getSourceModules() const;
std::optional<GlobalValueSummary::ImportKind>
- getImportType(const FunctionsToImportTy &GUIDToImportType,
- GlobalValue::GUID GUID) const;
+ getImportType(StringRef FromModule, GlobalValue::GUID GUID) const;
+
+ // Iterate over the import list. The caller gets tuples of FromModule,
+ // GUID, and ImportKind instead of import IDs.
+ auto begin() const { return map_iterator(Imports.begin(), IDs); }
+ auto end() const { return map_iterator(Imports.end(), IDs); }
+
+ friend class SortedImportList;
+
+ private:
+ ImportIDTable &IDs;
+ DenseSet<ImportIDTable::ImportIDTy> Imports;
+ };
+
+ // A read-only copy of ImportMapTy with its contents sorted according to the
+ // given comparison function.
+ class SortedImportList {
+ public:
+ SortedImportList(const ImportMapTy &ImportMap,
+ llvm::function_ref<
+ bool(const std::pair<StringRef, GlobalValue::GUID> &,
+ const std::pair<StringRef, GlobalValue::GUID> &)>
+ Comp)
+ : IDs(ImportMap.IDs), Imports(iterator_range(ImportMap.Imports)) {
+ llvm::sort(Imports, [&](ImportIDTable::ImportIDTy L,
+ ImportIDTable::ImportIDTy R) {
+ auto Lookup = [&](ImportIDTable::ImportIDTy Id)
+ -> std::pair<StringRef, GlobalValue::GUID> {
+ auto Tuple = IDs.lookup(Id);
+ return std::make_pair(std::get<0>(Tuple), std::get<1>(Tuple));
+ };
+ return Comp(Lookup(L), Lookup(R));
+ });
+ }
- const ImportMapTyImpl &getImportMap() const { return ImportMap; }
+ // Iterate over the import list. The caller gets tuples of FromModule,
+ // GUID, and ImportKind instead of import IDs.
+ auto begin() const { return map_iterator(Imports.begin(), IDs); }
+ auto end() const { return map_iterator(Imports.end(), IDs); }
private:
- ImportMapTyImpl ImportMap;
+ const ImportIDTable &IDs;
+ SmallVector<ImportIDTable::ImportIDTy, 0> Imports;
};
// A map from destination modules to lists of imports.
@@ -231,7 +267,7 @@ class FunctionImporter {
ImportListsTy(size_t Size) : ListsImpl(Size) {}
ImportMapTy &operator[](StringRef DestMod) {
- return ListsImpl.try_emplace(DestMod).first->second;
+ return ListsImpl.try_emplace(DestMod, ImportIDs).first->second;
}
size_t size() const { return ListsImpl.size(); }
@@ -242,6 +278,7 @@ class FunctionImporter {
private:
DenseMap<StringRef, ImportMapTy> ListsImpl;
+ ImportIDTable ImportIDs;
};
/// The set contains an entry for every global value that the module exports.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 09dfec03cb0c34..646c6db8e58d36 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -174,51 +174,33 @@ std::string llvm::computeLTOCacheKey(
for (auto GUID : ExportsGUID)
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
- // Include the hash for every module we import functions from. The set of
- // imported symbols for each module may affect code generation and is
- // sensitive to link order, so include that as well.
- using ImportMapIteratorTy =
- FunctionImporter::ImportMapTy::ImportMapTyImpl::const_iterator;
- struct ImportModule {
- ImportMapIteratorTy ModIt;
- const ModuleSummaryIndex::ModuleInfo *ModInfo;
-
- StringRef getIdentifier() const { return ModIt->getFirst(); }
- const FunctionImporter::FunctionsToImportTy &getFunctions() const {
- return ModIt->second;
- }
-
- const ModuleHash &getHash() const { return ModInfo->second; }
- };
-
- std::vector<ImportModule> ImportModulesVector;
- ImportModulesVector.reserve(ImportList.getImportMap().size());
-
- for (ImportMapIteratorTy It = ImportList.getImportMap().begin();
- It != ImportList.getImportMap().end(); ++It) {
- ImportModulesVector.push_back({It, Index.getModule(It->getFirst())});
- }
// Order using module hash, to be both independent of module name and
// module order.
- llvm::sort(ImportModulesVector,
- [](const ImportModule &Lhs, const ImportModule &Rhs) -> bool {
- return Lhs.getHash() < Rhs.getHash();
- });
- std::vector<std::pair<uint64_t, uint8_t>> ImportedGUIDs;
- for (const ImportModule &Entry : ImportModulesVector) {
- auto ModHash = Entry.getHash();
- Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
-
- AddUint64(Entry.getFunctions().size());
-
- ImportedGUIDs.clear();
- for (auto &[Fn, ImportType] : Entry.getFunctions())
- ImportedGUIDs.push_back(std::make_pair(Fn, ImportType));
- llvm::sort(ImportedGUIDs);
- for (auto &[GUID, Type] : ImportedGUIDs) {
- AddUint64(GUID);
- AddUint8(Type);
+ auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L,
+ const std::pair<StringRef, GlobalValue::GUID> &R) {
+ return std::make_pair(Index.getModule(L.first)->second, L.second) <
+ std::make_pair(Index.getModule(R.first)->second, R.second);
+ };
+ FunctionImporter::SortedImportList SortedImportList(ImportList, Comp);
+
+ // Count the number of imports for each source module.
+ DenseMap<StringRef, unsigned> ModuleToNumImports;
+ for (const auto &[FromModule, GUID, Type] : SortedImportList)
+ ++ModuleToNumImports[FromModule];
+
+ std::optional<StringRef> LastModule;
+ for (const auto &[FromModule, GUID, Type] : SortedImportList) {
+ if (LastModule != FromModule) {
+ // Include the hash for every module we import functions from. The set of
+ // imported symbols for each module may affect code generation and is
+ // sensitive to link order, so include that as well.
+ LastModule = FromModule;
+ auto ModHash = Index.getModule(FromModule)->second;
+ Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+ AddUint64(ModuleToNumImports[FromModule]);
}
+ AddUint64(GUID);
+ AddUint8(Type);
}
// Include the hash for the resolved ODR.
@@ -287,16 +269,14 @@ std::string llvm::computeLTOCacheKey(
// Imported functions may introduce new uses of type identifier resolutions,
// so we need to collect their used resolutions as well.
- for (const ImportModule &ImpM : ImportModulesVector)
- for (auto &[GUID, UnusedImportType] : ImpM.getFunctions()) {
- GlobalValueSummary *S =
- Index.findSummaryInModule(GUID, ImpM.getIdentifier());
- AddUsedThings(S);
- // If this is an alias, we also care about any types/etc. that the aliasee
- // may reference.
- if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
- AddUsedThings(AS->getBaseObject());
- }
+ for (const auto &[FromModule, GUID, Type] : SortedImportList) {
+ GlobalValueSummary *S = Index.findSummaryInModule(GUID, FromModule);
+ AddUsedThings(S);
+ // If this is an alias, we also care about any types/etc. that the aliasee
+ // may reference.
+ if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
+ AddUsedThings(AS->getBaseObject());
+ }
auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
AddString(TId);
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 7a60ae51f02cb4..3f4f8dbf4c121a 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -337,35 +337,47 @@ using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
FunctionImporter::ImportMapTy::AddDefinitionStatus
FunctionImporter::ImportMapTy::addDefinition(StringRef FromModule,
GlobalValue::GUID GUID) {
- auto [It, Inserted] =
- ImportMap[FromModule].try_emplace(GUID, GlobalValueSummary::Definition);
- if (Inserted)
- return AddDefinitionStatus::Inserted;
- if (It->second == GlobalValueSummary::Definition)
+ auto [Def, Decl] = IDs.createImportIDs(FromModule, GUID);
+ if (!Imports.insert(Def).second)
+ // Already there.
return AddDefinitionStatus::NoChange;
- It->second = GlobalValueSummary::Definition;
- return AddDefinitionStatus::ChangedToDefinition;
+
+ // Remove Decl in case it's there. Note that a definition takes precedence
+ // over a declaration for a given GUID.
+ return Imports.erase(Decl) ? AddDefinitionStatus::ChangedToDefinition
+ : AddDefinitionStatus::Inserted;
}
void FunctionImporter::ImportMapTy::maybeAddDeclaration(
StringRef FromModule, GlobalValue::GUID GUID) {
- ImportMap[FromModule].try_emplace(GUID, GlobalValueSummary::Declaration);
+ auto [Def, Decl] = IDs.createImportIDs(FromModule, GUID);
+ // Insert Decl only if Def is not present. Note that a definition takes
+ // precedence over a declaration for a given GUID.
+ if (!Imports.contains(Def))
+ Imports.insert(Decl);
}
SmallVector<StringRef, 0>
FunctionImporter::ImportMapTy::getSourceModules() const {
- SmallVector<StringRef, 0> Modules(make_first_range(ImportMap));
+ SetVector<StringRef> ModuleSet;
+ for (const auto &[SrcMod, GUID, ImportType] : *this)
+ ModuleSet.insert(SrcMod);
+ SmallVector<StringRef, 0> Modules = ModuleSet.takeVector();
llvm::sort(Modules);
return Modules;
}
std::optional<GlobalValueSummary::ImportKind>
-FunctionImporter::ImportMapTy::getImportType(
- const FunctionsToImportTy &GUIDToImportType, GlobalValue::GUID GUID) const {
- auto Iter = GUIDToImportType.find(GUID);
- if (Iter == GUIDToImportType.end())
- return std::nullopt;
- return Iter->second;
+FunctionImporter::ImportMapTy::getImportType(StringRef FromModule,
+ GlobalValue::GUID GUID) const {
+ if (auto IDPair = IDs.getImportIDs(FromModule, GUID)) {
+ auto [Def, Decl] = *IDPair;
+ if (Imports.contains(Def))
+ return GlobalValueSummary::Definition;
+ if (Imports.contains(Decl))
+ return GlobalValueSummary::Declaration;
+ }
+ return std::nullopt;
}
/// Import globals referenced by a function or other globals that are being
@@ -1103,15 +1115,13 @@ collectImportStatistics(const ModuleSummaryIndex &Index,
const FunctionImporter::ImportMapTy &ImportList) {
DenseMap<StringRef, ImportStatistics> Histogram;
- for (const auto &[FromModule, GUIDs] : ImportList.getImportMap()) {
+ for (const auto &[FromModule, GUID, Type] : ImportList) {
ImportStatistics &Entry = Histogram[FromModule];
- for (const auto &[GUID, Type] : GUIDs) {
- ++Entry.Count;
- if (isGlobalVarSummary(Index, GUID))
- ++Entry.NumGVS;
- else if (Type == GlobalValueSummary::Definition)
- ++Entry.DefinedFS;
- }
+ ++Entry.Count;
+ if (isGlobalVarSummary(Index, GUID))
+ ++Entry.NumGVS;
+ else if (Type == GlobalValueSummary::Definition)
+ ++Entry.DefinedFS;
}
return Histogram;
}
@@ -1125,9 +1135,8 @@ static bool checkVariableImport(
DenseSet<GlobalValue::GUID> FlattenedImports;
for (const auto &ImportPerModule : ImportLists)
- for (const auto &ExportPerModule : ImportPerModule.second.getImportMap())
- for (const auto &[GUID, Type] : ExportPerModule.second)
- FlattenedImports.insert(GUID);
+ for (const auto &[FromModule, GUID, ImportType] : ImportPerModule.second)
+ FlattenedImports.insert(GUID);
// Checks that all GUIDs of read/writeonly vars we see in export lists
// are also in the import lists. Otherwise we my face linker undefs,
@@ -1230,7 +1239,7 @@ void llvm::ComputeCrossModuleImport(
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
<< " modules:\n");
- for (auto &ModuleImports : ImportLists) {
+ for (const auto &ModuleImports : ImportLists) {
auto ModName = ModuleImports.first;
auto &Exports = ExportLists[ModName];
unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
@@ -1528,21 +1537,19 @@ void llvm::gatherImportedSummariesForModule(
};
// Include summaries for imports.
- for (const auto &ILI : ImportList.getImportMap()) {
+ for (const auto &[FromModule, GUID, ImportType] : ImportList) {
auto &SummariesForIndex =
- LookupOrCreate(ModuleToSummariesForIndex, ILI.first);
+ LookupOrCreate(ModuleToSummariesForIndex, FromModule);
const auto &DefinedGVSummaries =
- ModuleToDefinedGVSummaries.lookup(ILI.first);
- for (const auto &[GUID, Type] : ILI.second) {
- const auto &DS = DefinedGVSummaries.find(GUID);
- assert(DS != DefinedGVSummaries.end() &&
- "Expected a defined summary for imported global value");
- if (Type == GlobalValueSummary::Declaration)
- DecSummaries.insert(DS->second);
-
- SummariesForIndex[GUID] = DS->second;
- }
+ ModuleToDefinedGVSummaries.lookup(FromModule);
+ const auto &DS = DefinedGVSummaries.find(GUID);
+ assert(DS != DefinedGVSummaries.end() &&
+ "Expected a defined summary for imported global value");
+ if (ImportType == GlobalValueSummary::Declaration)
+ DecSummaries.insert(DS->second);
+
+ SummariesForIndex[GUID] = DS->second;
}
}
@@ -1812,9 +1819,6 @@ Expected<bool> FunctionImporter::importFunctions(
// Do the actual import of functions now, one Module at a time
for (const auto &Name : ImportList.getSourceModules()) {
// Get the module for the import
- const auto &FunctionsToImportPerModule =
- ImportList.getImportMap().find(Name);
- assert(FunctionsToImportPerModule != ImportList.getImportMap().end());
Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
if (!SrcModuleOrErr)
return SrcModuleOrErr.takeError();
@@ -1827,15 +1831,13 @@ Expected<bool> FunctionImporter::importFunctions(
if (Error Err = SrcModule->materializeMetadata())
return std::move(Err);
- auto &ImportGUIDs = FunctionsToImportPerModule->second;
-
// Find the globals to import
SetVector<GlobalValue *> GlobalsToImport;
for (Function &F : *SrcModule) {
if (!F.hasName())
continue;
auto GUID = F.getGUID();
- auto MaybeImportType = ImportList.getImportType(ImportGUIDs, GUID);
+ auto MaybeImportType = ImportList.getImportType(Name, GUID);
bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
@@ -1871,7 +1873,7 @@ Expected<bool> FunctionImporter::importFunctions(
if (!GV.hasName())
continue;
auto GUID = GV.getGUID();
- auto MaybeImportType = ImportList.getImportType(ImportGUIDs, GUID);
+ auto MaybeImportType = ImportList.getImportType(Name, GUID);
bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
@@ -1891,7 +1893,7 @@ Expected<bool> FunctionImporter::importFunctions(
if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
continue;
auto GUID = GA.getGUID();
- auto MaybeImportType = ImportList.getImportType(ImportGUIDs, GUID);
+ auto MaybeImportType = ImportList.getImportType(Name, GUID);
bool ImportDefinition = MaybeImportType == GlobalValueSummary::Definition;
LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
@@ -1992,7 +1994,8 @@ static bool doImportingForModuleForTest(
std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
// First step is collecting the import list.
- FunctionImporter::ImportMapTy ImportList;
+ FunctionImporter::ImportIDTable ImportIDs;
+ FunctionImporter::ImportMapTy ImportList(ImportIDs);
// If requested, simply import all functions in the index. This is used
// when testing distributed backend handling via the opt tool, when
// we have distributed indexes containing exactly the summaries to import.
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index 54d38a6ddd6f10..317b6e20f64cff 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -325,7 +325,8 @@ static bool importFunctions(const char *argv0, Module &DestModule) {
ExitOnErr(llvm::getModuleSummaryIndexForFile(SummaryIndex));
// Map of Module -> List of globals to import from the Module
- FunctionImporter::ImportMapTy ImportList;
+ FunctionImporter::ImportIDTable ImportIDs;
+ FunctionImporter::ImportMapTy ImportList(ImportIDs);
auto ModuleLoader = [&DestModule](const char *argv0,
const std::string &Identifier) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/106772
More information about the cfe-commits
mailing list