[llvm] af784a5 - [ThinLTO] Use a set rather than a map to track exported ValueInfos. (#97360)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 13:15:22 PDT 2024
Author: Mingming Liu
Date: 2024-07-03T13:15:17-07:00
New Revision: af784a5c13328aa4a8ce622260563b459856a8d4
URL: https://github.com/llvm/llvm-project/commit/af784a5c13328aa4a8ce622260563b459856a8d4
DIFF: https://github.com/llvm/llvm-project/commit/af784a5c13328aa4a8ce622260563b459856a8d4.diff
LOG: [ThinLTO] Use a set rather than a map to track exported ValueInfos. (#97360)
https://github.com/llvm/llvm-project/pull/95482 is a reland of
https://github.com/llvm/llvm-project/pull/88024.
https://github.com/llvm/llvm-project/pull/95482 keeps indexing memory
usage reasonable by using unordered_map and doesn't make other changes
to originally reviewed code.
While discussing possible ways to minimize indexing memory usage, Teresa
asked whether I need `ExportSetTy` as a map or a set is sufficient. This
PR implements the idea. It uses a set rather than a map to track exposed
ValueInfos.
Currently, `ExportLists` has two use cases, and neither needs to track a
ValueInfo's import/export status. So using a set is sufficient and
correct.
1) In both in-process and distributed ThinLTO, it's used to decide if a
function or global variable is visible [1] from another module after importing
creates additional cross-module references.
* If a cross-module call edge is seen today, the callee must be visible
to another module without keeping track of its export status already.
For instance, this [2] is how callees of direct calls get exported.
2) For in-process ThinLTO [3], it's used to compute lto cache key.
* The cache key computation already hashes [4] 'ImportList' , and 'ExportList' is
determined by 'ImportList'. So it's fine to not track 'import type' for export list.
[1] https://github.com/llvm/llvm-project/blob/66cd8ec4c08252ebc73c82e4883a8da247ed146b/llvm/lib/LTO/LTO.cpp#L1815-L1819
[2] https://github.com/llvm/llvm-project/blob/66cd8ec4c08252ebc73c82e4883a8da247ed146b/llvm/lib/LTO/LTO.cpp#L1783-L1794
[3] https://github.com/llvm/llvm-project/blob/66cd8ec4c08252ebc73c82e4883a8da247ed146b/llvm/lib/LTO/LTO.cpp#L1494-L1496
[4] https://github.com/llvm/llvm-project/blob/b76100e220591fab2bf0a4917b216439f7aa4b09/llvm/lib/LTO/LTO.cpp#L194-L222
Added:
Modified:
llvm/include/llvm/Transforms/IPO/FunctionImport.h
llvm/lib/LTO/LTO.cpp
llvm/lib/Transforms/IPO/FunctionImport.cpp
llvm/test/ThinLTO/X86/funcimport-stats.ll
llvm/test/Transforms/FunctionImport/funcimport.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index d8c142ec89d82..3b03ba82b9272 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -104,13 +104,10 @@ class FunctionImporter {
/// index's module path string table).
using ImportMapTy = DenseMap<StringRef, FunctionsToImportTy>;
- /// The map contains an entry for every global value the module exports.
- /// The key is ValueInfo, and the value indicates whether the definition
- /// or declaration is visible to another module. If a function's definition is
- /// visible to other modules, the global values this function referenced are
- /// visible and shouldn't be internalized.
- /// TODO: Rename to `ExportMapTy`.
- using ExportSetTy = DenseMap<ValueInfo, GlobalValueSummary::ImportKind>;
+ /// The set contains an entry for every global value that the module exports.
+ /// Depending on the user context, this container is allowed to contain
+ /// definitions, declarations or a mix of both.
+ using ExportSetTy = DenseSet<ValueInfo>;
/// A function of this type is used to load modules referenced by the index.
using ModuleLoaderTy =
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 6bbec535d8e98..5382b1158cb04 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -161,19 +161,17 @@ void llvm::computeLTOCacheKey(
auto ModHash = Index.getModuleHash(ModuleID);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
- std::vector<std::pair<uint64_t, uint8_t>> ExportsGUID;
+ // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is
+ // used to compute cache key, we could omit hashing `ExportList` here.
+ std::vector<uint64_t> ExportsGUID;
ExportsGUID.reserve(ExportList.size());
- for (const auto &[VI, ExportType] : ExportList)
- ExportsGUID.push_back(
- std::make_pair(VI.getGUID(), static_cast<uint8_t>(ExportType)));
+ for (const auto &VI : ExportList)
+ ExportsGUID.push_back(VI.getGUID());
// Sort the export list elements GUIDs.
llvm::sort(ExportsGUID);
- for (auto [GUID, ExportType] : ExportsGUID) {
- // The export list can impact the internalization, be conservative here
+ for (auto GUID : ExportsGUID)
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
- AddUint8(ExportType);
- }
// Include the hash for every module we import functions from. The set of
// imported symbols for each module may affect code generation and is
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index ec5294b9512cf..2b0ee46f489ca 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -400,8 +400,7 @@ class GlobalsImporter final {
// later, in ComputeCrossModuleImport, after import decisions are
// complete, which is more efficient than adding them here.
if (ExportLists)
- (*ExportLists)[RefSummary->modulePath()][VI] =
- GlobalValueSummary::Definition;
+ (*ExportLists)[RefSummary->modulePath()].insert(VI);
// If variable is not writeonly we attempt to recursively analyze
// its references in order to import referenced constants.
@@ -582,7 +581,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
GlobalValueSummary::Definition;
GVI.onImportingSummary(*GVS);
if (ExportLists)
- (*ExportLists)[ExportingModule][VI] = GlobalValueSummary::Definition;
+ (*ExportLists)[ExportingModule].insert(VI);
}
LLVM_DEBUG(dbgs() << "[Workload] Done\n");
}
@@ -818,10 +817,8 @@ static void computeImportForFunction(
// Since definition takes precedence over declaration for the same VI,
// try emplace <VI, declaration> pair without checking insert result.
// If insert doesn't happen, there must be an existing entry keyed by
- // VI.
- if (ExportLists)
- (*ExportLists)[DeclSourceModule].try_emplace(
- VI, GlobalValueSummary::Declaration);
+ // VI. Note `ExportLists` only keeps track of exports due to imported
+ // definitions.
ImportList[DeclSourceModule].try_emplace(
VI.getGUID(), GlobalValueSummary::Declaration);
}
@@ -892,7 +889,7 @@ static void computeImportForFunction(
// later, in ComputeCrossModuleImport, after import decisions are
// complete, which is more efficient than adding them here.
if (ExportLists)
- (*ExportLists)[ExportModulePath][VI] = GlobalValueSummary::Definition;
+ (*ExportLists)[ExportModulePath].insert(VI);
}
auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
@@ -998,19 +995,29 @@ static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
return false;
}
-template <class T>
-static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index, T &Cont,
- unsigned &DefinedGVS,
- unsigned &DefinedFS) {
+// Return the number of global variable summaries in ExportSet.
+static unsigned
+numGlobalVarSummaries(const ModuleSummaryIndex &Index,
+ FunctionImporter::ExportSetTy &ExportSet) {
+ unsigned NumGVS = 0;
+ for (auto &VI : ExportSet)
+ if (isGlobalVarSummary(Index, VI.getGUID()))
+ ++NumGVS;
+ return NumGVS;
+}
+
+// Given ImportMap, return the number of global variable summaries and record
+// the number of defined function summaries as output parameter.
+static unsigned
+numGlobalVarSummaries(const ModuleSummaryIndex &Index,
+ FunctionImporter::FunctionsToImportTy &ImportMap,
+ unsigned &DefinedFS) {
unsigned NumGVS = 0;
- DefinedGVS = 0;
DefinedFS = 0;
- for (auto &[GUID, Type] : Cont) {
- if (isGlobalVarSummary(Index, GUID)) {
- if (Type == GlobalValueSummary::Definition)
- ++DefinedGVS;
+ for (auto &[GUID, Type] : ImportMap) {
+ if (isGlobalVarSummary(Index, GUID))
++NumGVS;
- } else if (Type == GlobalValueSummary::Definition)
+ else if (Type == GlobalValueSummary::Definition)
++DefinedFS;
}
return NumGVS;
@@ -1046,7 +1053,7 @@ static bool checkVariableImport(
};
for (auto &ExportPerModule : ExportLists)
- for (auto &[VI, Unused] : ExportPerModule.second)
+ for (auto &VI : ExportPerModule.second)
if (!FlattenedImports.count(VI.getGUID()) &&
IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
return false;
@@ -1079,14 +1086,12 @@ void llvm::ComputeCrossModuleImport(
// since we may import the same values multiple times into
diff erent modules
// during the import computation.
for (auto &ELI : ExportLists) {
+ // `NewExports` tracks the VI that gets exported because the full definition
+ // of its user/referencer gets exported.
FunctionImporter::ExportSetTy NewExports;
const auto &DefinedGVSummaries =
ModuleToDefinedGVSummaries.lookup(ELI.first);
- for (auto &[EI, Type] : ELI.second) {
- // If a variable is exported as a declaration, its 'refs' and 'calls' are
- // not further exported.
- if (Type == GlobalValueSummary::Declaration)
- continue;
+ for (auto &EI : ELI.second) {
// Find the copy defined in the exporting module so that we can mark the
// values it references in that specific definition as exported.
// Below we will add all references and called values, without regard to
@@ -1105,23 +1110,14 @@ void llvm::ComputeCrossModuleImport(
// we convert such variables initializers to "zeroinitializer".
// See processGlobalForThinLTO.
if (!Index.isWriteOnly(GVS))
- for (const auto &VI : GVS->refs()) {
- // Try to emplace the declaration entry. If a definition entry
- // already exists for key `VI`, this is a no-op.
- NewExports.try_emplace(VI, GlobalValueSummary::Declaration);
- }
+ for (const auto &VI : GVS->refs())
+ NewExports.insert(VI);
} else {
auto *FS = cast<FunctionSummary>(S);
- for (const auto &Edge : FS->calls()) {
- // Try to emplace the declaration entry. If a definition entry
- // already exists for key `VI`, this is a no-op.
- NewExports.try_emplace(Edge.first, GlobalValueSummary::Declaration);
- }
- for (const auto &Ref : FS->refs()) {
- // Try to emplace the declaration entry. If a definition entry
- // already exists for key `VI`, this is a no-op.
- NewExports.try_emplace(Ref, GlobalValueSummary::Declaration);
- }
+ for (const auto &Edge : FS->calls())
+ NewExports.insert(Edge.first);
+ for (const auto &Ref : FS->refs())
+ NewExports.insert(Ref);
}
}
// Prune list computed above to only include values defined in the
@@ -1129,7 +1125,7 @@ void llvm::ComputeCrossModuleImport(
// the same ref/call target multiple times in above loop, and it is more
// efficient to avoid a set lookup each time.
for (auto EI = NewExports.begin(); EI != NewExports.end();) {
- if (!DefinedGVSummaries.count(EI->first.getGUID()))
+ if (!DefinedGVSummaries.count(EI->getGUID()))
NewExports.erase(EI++);
else
++EI;
@@ -1144,29 +1140,22 @@ void llvm::ComputeCrossModuleImport(
for (auto &ModuleImports : ImportLists) {
auto ModName = ModuleImports.first;
auto &Exports = ExportLists[ModName];
- unsigned DefinedGVS = 0, DefinedFS = 0;
- unsigned NumGVS =
- numGlobalVarSummaries(Index, Exports, DefinedGVS, DefinedFS);
- LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports " << DefinedFS
- << " function as definitions, "
- << Exports.size() - NumGVS - DefinedFS
- << " functions as declarations, " << DefinedGVS
- << " var definitions and " << NumGVS - DefinedGVS
- << " var declarations. Imports from "
- << ModuleImports.second.size() << " modules.\n");
+ unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
+ LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
+ << Exports.size() - NumGVS << " functions and " << NumGVS
+ << " vars. Imports from " << ModuleImports.second.size()
+ << " modules.\n");
for (auto &Src : ModuleImports.second) {
auto SrcModName = Src.first;
- unsigned DefinedGVS = 0, DefinedFS = 0;
+ unsigned DefinedFS = 0;
unsigned NumGVSPerMod =
- numGlobalVarSummaries(Index, Src.second, DefinedGVS, DefinedFS);
+ numGlobalVarSummaries(Index, Src.second, DefinedFS);
LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
<< Src.second.size() - NumGVSPerMod - DefinedFS
<< " function declarations imported from " << SrcModName
<< "\n");
- LLVM_DEBUG(dbgs() << " - " << DefinedGVS << " global vars definition and "
- << NumGVSPerMod - DefinedGVS
- << " global vars declaration imported from "
- << SrcModName << "\n");
+ LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
+ << " global vars imported from " << SrcModName << "\n");
}
}
#endif
@@ -1180,17 +1169,14 @@ static void dumpImportListForModule(const ModuleSummaryIndex &Index,
<< ImportList.size() << " modules.\n");
for (auto &Src : ImportList) {
auto SrcModName = Src.first;
- unsigned DefinedGVS = 0, DefinedFS = 0;
- unsigned NumGVSPerMod =
- numGlobalVarSummaries(Index, Src.second, DefinedGVS, DefinedFS);
+ unsigned DefinedFS = 0;
+ unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
<< Src.second.size() - DefinedFS - NumGVSPerMod
<< " function declarations imported from " << SrcModName
<< "\n");
- LLVM_DEBUG(dbgs() << " - " << DefinedGVS << " var definitions and "
- << NumGVSPerMod - DefinedGVS
- << " var declarations imported from " << SrcModName
- << "\n");
+ LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
+ << SrcModName << "\n");
}
}
#endif
diff --git a/llvm/test/ThinLTO/X86/funcimport-stats.ll b/llvm/test/ThinLTO/X86/funcimport-stats.ll
index 7fcd33855fe1a..1c2fd092ccb49 100644
--- a/llvm/test/ThinLTO/X86/funcimport-stats.ll
+++ b/llvm/test/ThinLTO/X86/funcimport-stats.ll
@@ -10,7 +10,7 @@
; RUN: cat %t4 | FileCheck %s
; CHECK: - [[NUM_FUNCS:[0-9]+]] function definitions and 0 function declarations imported from
-; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars definition and 0 global vars declaration imported from
+; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars imported from
; CHECK: [[NUM_FUNCS]] function-import - Number of functions imported in backend
; CHECK-NEXT: [[NUM_FUNCS]] function-import - Number of functions thin link decided to import
diff --git a/llvm/test/Transforms/FunctionImport/funcimport.ll b/llvm/test/Transforms/FunctionImport/funcimport.ll
index 635750b33fff0..8f7e8340d4909 100644
--- a/llvm/test/Transforms/FunctionImport/funcimport.ll
+++ b/llvm/test/Transforms/FunctionImport/funcimport.ll
@@ -167,7 +167,7 @@ declare void @variadic_va_start(...)
; DUMP: Module [[M1:.*]] imports from 1 module
; DUMP-NEXT: 15 function definitions and 0 function declarations imported from [[M2:.*]]
-; DUMP-NEXT: 4 var definitions and 0 var declarations imported from [[M2]]
+; DUMP-NEXT: 4 vars imported from [[M2]]
; DUMP: Imported 15 functions for Module [[M1]]
; DUMP-NEXT: Imported 4 global variables for Module [[M1]]
More information about the llvm-commits
mailing list