[clang] [clang][modules] Optimize construction and usage of the submodule index (PR #113391)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 28 11:46:08 PDT 2024
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/113391
>From c6ff124355209de31c86096eb2ede14d598aa5cd Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Tue, 22 Oct 2024 15:46:07 -0700
Subject: [PATCH 1/2] [clang][modules] Optimize construction and usage of the
submodule index
This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand.
Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway.
This speeds up `clang-scan-deps` by ~0.5% on my workload.
---
clang/include/clang/Basic/Module.h | 3 +--
clang/include/clang/Lex/ModuleMap.h | 10 ++++-----
clang/lib/Basic/Module.cpp | 12 ++++++-----
clang/lib/Lex/ModuleMap.cpp | 29 ++++++++++++++++-----------
clang/lib/Serialization/ASTReader.cpp | 14 ++++++++-----
5 files changed, 39 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 1ab3b5e5f81567..dd384c1d76c5fd 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -227,7 +227,7 @@ class alignas(8) Module {
/// A mapping from the submodule name to the index into the
/// \c SubModules vector at which that submodule resides.
- llvm::StringMap<unsigned> SubModuleIndex;
+ mutable llvm::StringMap<unsigned> SubModuleIndex;
/// The AST file if this is a top-level module which has a
/// corresponding serialized AST file, or null otherwise.
@@ -612,7 +612,6 @@ class alignas(8) Module {
void setParent(Module *M) {
assert(!Parent);
Parent = M;
- Parent->SubModuleIndex[Name] = Parent->SubModules.size();
Parent->SubModules.push_back(this);
}
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 75b567a347cb6c..b4a8e0e358ffbe 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -541,11 +541,11 @@ class ModuleMap {
///
/// \param IsExplicit Whether this is an explicit submodule.
///
- /// \returns The found or newly-created module, along with a boolean value
- /// that will be true if the module is newly-created.
- std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent,
- bool IsFramework,
- bool IsExplicit);
+ /// \returns The found or newly-created module.
+ Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
+ bool IsExplicit);
+ Module *createModule(StringRef Name, Module *Parent, bool IsFramework,
+ bool IsExplicit);
/// Create a global module fragment for a C++ module unit.
///
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index a7a3f6b37efef1..330108d5b3e47f 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name,
NoUndeclaredIncludes = Parent->NoUndeclaredIncludes;
ModuleMapIsPrivate = Parent->ModuleMapIsPrivate;
- Parent->SubModuleIndex[Name] = Parent->SubModules.size();
Parent->SubModules.push_back(this);
}
}
@@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) {
}
Module *Module::findSubmodule(StringRef Name) const {
- llvm::StringMap<unsigned>::const_iterator Pos = SubModuleIndex.find(Name);
- if (Pos == SubModuleIndex.end())
- return nullptr;
+ // Add new submodules into the index.
+ for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I)
+ SubModuleIndex[SubModules[I]->Name] = I;
- return SubModules[Pos->getValue()];
+ if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end())
+ return SubModules[It->second];
+
+ return nullptr;
}
Module *Module::getGlobalModuleFragment() const {
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 201ab91cf68ca1..10774429a2177b 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(SkippedDir.getName()), NameBuf);
- Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
- Explicit).first;
+ Result =
+ findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
setInferredModuleAllowedBy(Result, UmbrellaModuleMap);
// Associate the module and the directory.
@@ -672,8 +672,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(File.getName()), NameBuf);
- Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
- Explicit).first;
+ Result =
+ findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
setInferredModuleAllowedBy(Result, UmbrellaModuleMap);
Result->addTopHeader(File);
@@ -857,15 +857,21 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
return Context->findSubmodule(Name);
}
-std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
- Module *Parent,
- bool IsFramework,
- bool IsExplicit) {
+Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent,
+ bool IsFramework, bool IsExplicit) {
// Try to find an existing module with this name.
if (Module *Sub = lookupModuleQualified(Name, Parent))
- return std::make_pair(Sub, false);
+ return Sub;
// Create a new module with this name.
+ return createModule(Name, Parent, IsFramework, IsExplicit);
+}
+
+Module *ModuleMap::createModule(StringRef Name, Module *Parent,
+ bool IsFramework, bool IsExplicit) {
+ assert(lookupModuleQualified(Name, Parent) == nullptr &&
+ "Creating duplicate submodule");
+
Module *Result = new (ModulesAlloc.Allocate())
Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent,
IsFramework, IsExplicit, NumCreatedModules++);
@@ -875,7 +881,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
Modules[Name] = Result;
ModuleScopeIDs[Result] = CurrentModuleScopeID;
}
- return std::make_pair(Result, true);
+ return Result;
}
Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc,
@@ -2124,8 +2130,7 @@ void ModuleMapParser::parseModuleDecl() {
Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
} else {
ActiveModule =
- Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
- .first;
+ Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit);
}
ActiveModule->DefinitionLoc = ModuleNameLoc;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 2419ed84e68acf..74a79ac54bb4eb 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5756,6 +5756,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
return Err;
ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+ bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
+ // If we don't know the top-level module, there's no point in doing qualified
+ // lookup of its submodules; it won't find anything anywhere within this tree.
+ // Let's skip that and avoid some string lookups.
+ auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule
+ : &ModuleMap::findOrCreateModule;
+
bool First = true;
Module *CurrentModule = nullptr;
RecordData Record;
@@ -5829,11 +5836,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
if (Parent)
ParentModule = getSubmodule(Parent);
- // Retrieve this (sub)module from the module map, creating it if
- // necessary.
- CurrentModule =
- ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
- .first;
+ CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule,
+ IsFramework, IsExplicit);
SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
if (GlobalIndex >= SubmodulesLoaded.size() ||
>From 21295bc48b1f0392a71f0768f41f38d375e0c195 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 28 Oct 2024 11:28:00 -0700
Subject: [PATCH 2/2] Document new function
---
clang/include/clang/Lex/ModuleMap.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index b4a8e0e358ffbe..5ee152e4213abf 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -544,6 +544,9 @@ class ModuleMap {
/// \returns The found or newly-created module.
Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
bool IsExplicit);
+ /// Create new submodule, assuming it does not exist. This function can only
+ /// be called when it is guaranteed that this submodule does not exist yet.
+ /// The parameters have same semantics as \c ModuleMap::findOrCreateModule.
Module *createModule(StringRef Name, Module *Parent, bool IsFramework,
bool IsExplicit);
More information about the cfe-commits
mailing list