[llvm-branch-commits] [llvm] [nfc][ThinLTO] Generate import status in per-module combined summary (PR #88024)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon May 6 16:09:07 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lto
Author: Mingming Liu (minglotus-6)
<details>
<summary>Changes</summary>
The goal is to generate `declaration` import status in the per-module combined summaries, when the function's definition is not imported.
Implementation notes:
* ThinLink
1. Adds `-import-declaration` (default false). `selectCallee` stores the summary corresponding to {noinline,too-large} callees as output parameter, and the result is used to populate `{Import,Export}List` only if `import-declaration` is true.
2. When function `foo` of a module needs the declaration of `callee` while function `bar` in the same module needs the definition of `callee`, definition will take precedence over declaration.
* Postlink
- With this patch, the postlink pipeline is updated to keep its current behavior (import definitions), but not updated to handle `declaration` summaries. Two use cases ([better call-graph sort](https://discourse.llvm.org/t/rfc-for-better-call-graph-sort-build-a-more-complete-call-graph-by-adding-more-indirect-call-edges/74029#support-cross-module-function-declaration-import-5) or [cross-module auto-init](https://github.com/llvm/llvm-project/pull/87597#discussion_r1556067195)) would use this bit differently.
---
Patch is 43.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88024.diff
12 Files Affected:
- (modified) llvm/include/llvm/IR/ModuleSummaryIndex.h (+5-1)
- (modified) llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h (+2-1)
- (modified) llvm/include/llvm/Transforms/IPO/FunctionImport.h (+15-6)
- (modified) llvm/lib/LTO/LTO.cpp (+11-7)
- (modified) llvm/lib/LTO/LTOBackend.cpp (+8-1)
- (modified) llvm/lib/LTO/ThinLTOCodeGenerator.cpp (+10-3)
- (modified) llvm/lib/Transforms/IPO/FunctionImport.cpp (+207-70)
- (modified) llvm/test/ThinLTO/X86/funcimport-stats.ll (+2-2)
- (added) llvm/test/ThinLTO/X86/import_callee_declaration.ll (+165)
- (modified) llvm/test/Transforms/FunctionImport/funcimport.ll (+6-2)
- (modified) llvm/tools/llvm-link/llvm-link.cpp (+5-1)
- (modified) llvm/tools/llvm-lto/llvm-lto.cpp (+5-2)
``````````diff
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 66209b8cf3ea2d..29d13cf4f4fae6 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -585,7 +585,11 @@ class GlobalValueSummary {
return Flags.ImportType == GlobalValueSummary::ImportKind::Declaration;
}
- void setImportKind(ImportKind IK) { Flags.ImportType = IK; }
+ GlobalValueSummary::ImportKind importType() const {
+ return static_cast<ImportKind>(Flags.ImportType);
+ }
+
+ void setImportType(ImportKind IK) { Flags.ImportType = IK; }
GlobalValue::VisibilityTypes getVisibility() const {
return (GlobalValue::VisibilityTypes)Flags.Visibility;
diff --git a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index c450acda82ad06..6e197ae2f279c9 100644
--- a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -276,7 +276,8 @@ class ThinLTOCodeGenerator {
void gatherImportedSummariesForModule(
Module &Module, ModuleSummaryIndex &Index,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
- const lto::InputFile &File);
+ const lto::InputFile &File,
+ ModuleToGVSummaryPtrSet &ModuleToDecSummaries);
/**
* Perform internalization. Index is updated to reflect linkage changes.
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index c4d19e8641eca2..fb38c68b56df0e 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -31,9 +31,9 @@ class Module;
/// based on the provided summary informations.
class FunctionImporter {
public:
- /// Set of functions to import from a source module. Each entry is a set
- /// containing all the GUIDs of all functions to import for a source module.
- using FunctionsToImportTy = std::unordered_set<GlobalValue::GUID>;
+ /// The functions to import from a source module and their import type.
+ using FunctionsToImportTy =
+ DenseMap<GlobalValue::GUID, GlobalValueSummary::ImportKind>;
/// The different reasons selectCallee will chose not to import a
/// candidate.
@@ -99,8 +99,12 @@ class FunctionImporter {
/// index's module path string table).
using ImportMapTy = DenseMap<StringRef, FunctionsToImportTy>;
- /// The set contains an entry for every global value the module exports.
- using ExportSetTy = DenseSet<ValueInfo>;
+ /// The map contains an entry for every global value the module exports.
+ /// The key is ValueInfo, and the value indicates whether the definition
+ /// or declaration is visible to another module. If a function's definition is
+ /// visible to other modules, the global values this function referenced are
+ /// visible and shouldn't be internalized.
+ using ExportSetTy = DenseMap<ValueInfo, GlobalValueSummary::ImportKind>;
/// A function of this type is used to load modules referenced by the index.
using ModuleLoaderTy =
@@ -207,11 +211,16 @@ bool convertToDeclaration(GlobalValue &GV);
/// \p ModuleToSummariesForIndex will be populated with the needed summaries
/// from each required module path. Use a std::map instead of StringMap to get
/// stable order for bitcode emission.
+///
+/// \p ModuleToDecSummaries will be populated with the set of declarations \p
+/// ModulePath need from other modules. They key is module path, and the value
+/// is a set of summary pointers.
void gatherImportedSummariesForModule(
StringRef ModulePath,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
const FunctionImporter::ImportMapTy &ImportList,
- std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
+ std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+ ModuleToGVSummaryPtrSet &ModuleToDecSummaries);
/// Emit into \p OutputFilename the files module \p ModulePath will import from.
std::error_code EmitImportsFiles(
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 53060df7f503e0..e7e5a6ab392774 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -158,7 +158,7 @@ void llvm::computeLTOCacheKey(
std::vector<uint64_t> ExportsGUID;
ExportsGUID.reserve(ExportList.size());
- for (const auto &VI : ExportList) {
+ for (const auto &[VI, UnusedImportType] : ExportList) {
auto GUID = VI.getGUID();
ExportsGUID.push_back(GUID);
}
@@ -204,8 +204,8 @@ void llvm::computeLTOCacheKey(
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
AddUint64(Entry.getFunctions().size());
- for (auto &Fn : Entry.getFunctions())
- AddUint64(Fn);
+ for (auto &[GUID, UnusedImportType] : Entry.getFunctions())
+ AddUint64(GUID);
}
// Include the hash for the resolved ODR.
@@ -275,9 +275,9 @@ void llvm::computeLTOCacheKey(
// Imported functions may introduce new uses of type identifier resolutions,
// so we need to collect their used resolutions as well.
for (const ImportModule &ImpM : ImportModulesVector)
- for (auto &ImpF : ImpM.getFunctions()) {
+ for (auto &[GUID, UnusedImportType] : ImpM.getFunctions()) {
GlobalValueSummary *S =
- Index.findSummaryInModule(ImpF, ImpM.getIdentifier());
+ Index.findSummaryInModule(GUID, ImpM.getIdentifier());
AddUsedThings(S);
// If this is an alias, we also care about any types/etc. that the aliasee
// may reference.
@@ -1389,15 +1389,19 @@ class lto::ThinBackendProc {
llvm::StringRef ModulePath,
const std::string &NewModulePath) {
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
+ ModuleToGVSummaryPtrSet ModuleToDeclarationSummaries;
std::error_code EC;
gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
- ImportList, ModuleToSummariesForIndex);
+ ImportList, ModuleToSummariesForIndex,
+ ModuleToDeclarationSummaries);
raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
sys::fs::OpenFlags::OF_None);
if (EC)
return errorCodeToError(EC);
- writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
+
+ writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
+ &ModuleToDeclarationSummaries);
if (ShouldEmitImportsFiles) {
EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 71e8849dc3cc91..a2cd672e0f0a18 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -716,7 +716,14 @@ bool lto::initImportList(const Module &M,
if (Summary->modulePath() == M.getModuleIdentifier())
continue;
// Add an entry to provoke importing by thinBackend.
- ImportList[Summary->modulePath()].insert(GUID);
+ // Try emplace the entry first. If an entry with the same key already
+ // exists, set the value to 'std::min(existing-value, new-value)' to make
+ // sure a definition takes precedence over a declaration.
+ auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
+ GUID, Summary->importType());
+
+ if (!Inserted)
+ Iter->second = std::min(Iter->second, Summary->importType());
}
}
return true;
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 8f517eb50dc76f..1227f26cc42805 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -766,7 +766,7 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
Module &TheModule, ModuleSummaryIndex &Index,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
- const lto::InputFile &File) {
+ const lto::InputFile &File, ModuleToGVSummaryPtrSet &ModuleToDecSummaries) {
auto ModuleCount = Index.modulePaths().size();
auto ModuleIdentifier = TheModule.getModuleIdentifier();
@@ -796,7 +796,8 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
llvm::gatherImportedSummariesForModule(
ModuleIdentifier, ModuleToDefinedGVSummaries,
- ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
+ ImportLists[ModuleIdentifier], ModuleToSummariesForIndex,
+ ModuleToDecSummaries);
}
/**
@@ -833,9 +834,15 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
ExportLists);
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
+ // 'EmitImportsFiles' emits the list of modules from which to import from, and
+ // the set of keys in `ModuleToSummariesForIndex` should be a superset of keys
+ // in `ModuleToDecSummaries`, so no need to use `ModuleToDecSummaries` in
+ // `EmitImportFiles`.
+ ModuleToGVSummaryPtrSet ModuleToDecSummaries;
llvm::gatherImportedSummariesForModule(
ModuleIdentifier, ModuleToDefinedGVSummaries,
- ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
+ ImportLists[ModuleIdentifier], ModuleToSummariesForIndex,
+ ModuleToDecSummaries);
std::error_code EC;
if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName,
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 68f9799616ae6d..77d644dc97ab7d 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -140,6 +140,17 @@ static cl::opt<bool>
ImportAllIndex("import-all-index",
cl::desc("Import all external functions in index."));
+/// This is a test-only option.
+/// If this option is enabled, the ThinLTO indexing step will import each
+/// function declaration as a fallback. In a real build this may increase ram
+/// usage of the indexing step unnecessarily.
+/// TODO: Implement selective import (based on combined summary analysis) to
+/// ensure the imported function has a use case in the postlink pipeline.
+static cl::opt<bool> ImportDeclaration(
+ "import-declaration", cl::init(false), cl::Hidden,
+ cl::desc("If true, import function declaration as fallback if the function "
+ "definition is not imported."));
+
/// Pass a workload description file - an example of workload would be the
/// functions executed to satisfy a RPC request. A workload is defined by a root
/// function and the list of functions that are (frequently) needed to satisfy
@@ -245,8 +256,10 @@ static auto qualifyCalleeCandidates(
}
/// Given a list of possible callee implementation for a call site, select one
-/// that fits the \p Threshold. If none are found, the Reason will give the last
-/// reason for the failure (last, in the order of CalleeSummaryList entries).
+/// that fits the \p Threshold for function definition import. If none are
+/// found, the Reason will give the last reason for the failure (last, in the
+/// order of CalleeSummaryList entries). If caller wants to select eligible
+/// summary
///
/// FIXME: select "best" instead of first that fits. But what is "best"?
/// - The smallest: more likely to be inlined.
@@ -259,24 +272,32 @@ static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex &Index,
ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
unsigned Threshold, StringRef CallerModulePath,
+ const GlobalValueSummary *&TooLargeOrNoInlineSummary,
FunctionImporter::ImportFailureReason &Reason) {
+ // Records the last summary with reason noinline or too-large.
+ TooLargeOrNoInlineSummary = nullptr;
auto QualifiedCandidates =
qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
for (auto QualifiedValue : QualifiedCandidates) {
Reason = QualifiedValue.first;
+ // Skip a summary if its import is not (proved to be) legal.
if (Reason != FunctionImporter::ImportFailureReason::None)
continue;
auto *Summary =
cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
+ // Don't bother importing the definition if the chance of inlining it is
+ // not high enough (except under `--force-import-all`).
if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
!ForceImportAll) {
+ TooLargeOrNoInlineSummary = Summary;
Reason = FunctionImporter::ImportFailureReason::TooLarge;
continue;
}
- // Don't bother importing if we can't inline it anyway.
+ // Don't bother importing the definition if we can't inline it anyway.
if (Summary->fflags().NoInline && !ForceImportAll) {
+ TooLargeOrNoInlineSummary = Summary;
Reason = FunctionImporter::ImportFailureReason::NoInline;
continue;
}
@@ -358,17 +379,28 @@ class GlobalsImporter final {
if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
LocalNotInModule(GVS))
continue;
- auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
+
+ // If there isn't an entry for GUID, insert <GUID, Definition> pair.
+ // Otherwise, definition should take precedence over declaration.
+ auto [Iter, Inserted] =
+ ImportList[RefSummary->modulePath()].try_emplace(
+ VI.getGUID(), GlobalValueSummary::Definition);
// Only update stat and exports if we haven't already imported this
// variable.
- if (!ILI.second)
+ if (!Inserted) {
+ // FIXME: Introduce a wrapper struct around ImportType, and provide
+ // an `updateType` method for better readability, just like how we
+ // update the hotness of a call edge.
+ Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
break;
+ }
NumImportedGlobalVarsThinLink++;
// Any references made by this variable will be marked exported
// later, in ComputeCrossModuleImport, after import decisions are
// complete, which is more efficient than adding them here.
if (ExportLists)
- (*ExportLists)[RefSummary->modulePath()].insert(VI);
+ (*ExportLists)[RefSummary->modulePath()][VI] =
+ GlobalValueSummary::Definition;
// If variable is not writeonly we attempt to recursively analyze
// its references in order to import referenced constants.
@@ -545,10 +577,11 @@ class WorkloadImportsManager : public ModuleImportsManager {
LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
<< ExportingModule << " : "
<< Function::getGUID(VI.name()) << "\n");
- ImportList[ExportingModule].insert(VI.getGUID());
+ ImportList[ExportingModule][VI.getGUID()] =
+ GlobalValueSummary::Definition;
GVI.onImportingSummary(*GVS);
if (ExportLists)
- (*ExportLists)[ExportingModule].insert(VI);
+ (*ExportLists)[ExportingModule][VI] = GlobalValueSummary::Definition;
}
LLVM_DEBUG(dbgs() << "[Workload] Done\n");
}
@@ -769,9 +802,28 @@ static void computeImportForFunction(
}
FunctionImporter::ImportFailureReason Reason{};
- CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
- Summary.modulePath(), Reason);
+
+ // `SummaryForDeclImport` is an summary eligible for declaration import.
+ const GlobalValueSummary *SummaryForDeclImport = nullptr;
+ CalleeSummary =
+ selectCallee(Index, VI.getSummaryList(), NewThreshold,
+ Summary.modulePath(), SummaryForDeclImport, Reason);
if (!CalleeSummary) {
+ // There isn't a callee for definition import but one for declaration
+ // import.
+ if (ImportDeclaration && SummaryForDeclImport) {
+ StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
+
+ // Since definition takes precedence over declaration for the same VI,
+ // try emplace <VI, declaration> pair without checking insert result.
+ // If insert doesn't happen, there must be an existing entry keyed by
+ // VI.
+ if (ExportLists)
+ (*ExportLists)[DeclSourceModule].try_emplace(
+ VI, GlobalValueSummary::Declaration);
+ ImportList[DeclSourceModule].try_emplace(
+ VI.getGUID(), GlobalValueSummary::Declaration);
+ }
// Update with new larger threshold if this was a retry (otherwise
// we would have already inserted with NewThreshold above). Also
// update failure info if requested.
@@ -791,6 +843,7 @@ static void computeImportForFunction(
FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
VI, Edge.second.getHotness(), Reason, 1);
}
+
if (ForceImportAll) {
std::string Msg = std::string("Failed to import function ") +
VI.name().str() + " due to " +
@@ -816,11 +869,15 @@ static void computeImportForFunction(
"selectCallee() didn't honor the threshold");
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
- auto ILI = ImportList[ExportModulePath].insert(VI.getGUID());
+
+ // Try emplace the definition entry, and update stats based on insertion
+ // status.
+ auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
+ VI.getGUID(), GlobalValueSummary::Definition);
+
// We previously decided to import this GUID definition if it was already
// inserted in the set of imports from the exporting module.
- bool PreviouslyImported = !ILI.second;
- if (!PreviouslyImported) {
+ if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
NumImportedFunctionsThinLink++;
if (IsHotCallsite)
NumImportedHotFunctionsThinLink++;
@@ -828,11 +885,14 @@ static void computeImportForFunction(
NumImportedCriticalFunctionsThinLink++;
}
+ if (Iter->second == GlobalValueSummary::Declaration)
+ Iter->second = GlobalValueSummary::Definition;
+
// Any calls/references made by this function will be marked exported
// later, in ComputeCrossModuleImport, after import decisions are
// complete, which is more efficient than adding them here.
if (ExportLists)
- (*ExportLists)[ExportModulePath].insert(VI);
+ (*ExportLists)[ExportModulePath][VI] = GlobalValueSummary::Definition;
}
auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
@@ -939,12 +999,20 @@ static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
}
template <class T>
-static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
- T &Cont) {
+static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index, T &Cont,
+ unsigned &DefinedGVS,
+ unsigned &DefinedFS) {
unsigned NumGVS = 0;
- for (auto &V : Cont)
- if (isGlobalVarSummary(Index, V))
+ DefinedGVS = 0;
+ DefinedFS = 0;
+ for (auto &[GUID, Type] : Cont) {
+ if (isGlobalVarSummary(Index, GUID)) {
+ if (Type == GlobalValueSummary::Definition)
+ ++DefinedGVS;
++NumGVS;
+ } else if (Type == GlobalValueSummary::Definition)
+ ++DefinedFS;
+ }
return NumGVS;
}
#endif
@@ -954,13 +1022,12 @@ static bool checkVariableImport(
const ModuleSummaryIndex &Index,
DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
-
DenseSet<GlobalValue::GUID> FlattenedImports;
for (auto &ImportPerModule : ImportLists)
for (auto &ExportPerModule : ImportPerModule.second)
- FlattenedImports.insert(ExportPerModule.second.begin(),
- ExportPerModule.second.end());
+ for (auto &[GUID, Type] : ExportPerModule.second)
+ FlattenedImports.insert(GUID);
// Checks that all GUIDs of read/writeonly var...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/88024
More information about the llvm-branch-commits
mailing list