[clang] [clang][modules] Optimize construction and usage of the submodule index (PR #113391)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 28 11:46:08 PDT 2024


https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/113391

>From c6ff124355209de31c86096eb2ede14d598aa5cd Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Tue, 22 Oct 2024 15:46:07 -0700
Subject: [PATCH 1/2] [clang][modules] Optimize construction and usage of the
 submodule index

This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand.

Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway.

This speeds up `clang-scan-deps` by ~0.5% on my workload.
---
 clang/include/clang/Basic/Module.h    |  3 +--
 clang/include/clang/Lex/ModuleMap.h   | 10 ++++-----
 clang/lib/Basic/Module.cpp            | 12 ++++++-----
 clang/lib/Lex/ModuleMap.cpp           | 29 ++++++++++++++++-----------
 clang/lib/Serialization/ASTReader.cpp | 14 ++++++++-----
 5 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 1ab3b5e5f81567..dd384c1d76c5fd 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -227,7 +227,7 @@ class alignas(8) Module {
 
   /// A mapping from the submodule name to the index into the
   /// \c SubModules vector at which that submodule resides.
-  llvm::StringMap<unsigned> SubModuleIndex;
+  mutable llvm::StringMap<unsigned> SubModuleIndex;
 
   /// The AST file if this is a top-level module which has a
   /// corresponding serialized AST file, or null otherwise.
@@ -612,7 +612,6 @@ class alignas(8) Module {
   void setParent(Module *M) {
     assert(!Parent);
     Parent = M;
-    Parent->SubModuleIndex[Name] = Parent->SubModules.size();
     Parent->SubModules.push_back(this);
   }
 
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 75b567a347cb6c..b4a8e0e358ffbe 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -541,11 +541,11 @@ class ModuleMap {
   ///
   /// \param IsExplicit Whether this is an explicit submodule.
   ///
-  /// \returns The found or newly-created module, along with a boolean value
-  /// that will be true if the module is newly-created.
-  std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent,
-                                               bool IsFramework,
-                                               bool IsExplicit);
+  /// \returns The found or newly-created module.
+  Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
+                             bool IsExplicit);
+  Module *createModule(StringRef Name, Module *Parent, bool IsFramework,
+                       bool IsExplicit);
 
   /// Create a global module fragment for a C++ module unit.
   ///
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index a7a3f6b37efef1..330108d5b3e47f 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name,
     NoUndeclaredIncludes = Parent->NoUndeclaredIncludes;
     ModuleMapIsPrivate = Parent->ModuleMapIsPrivate;
 
-    Parent->SubModuleIndex[Name] = Parent->SubModules.size();
     Parent->SubModules.push_back(this);
   }
 }
@@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) {
 }
 
 Module *Module::findSubmodule(StringRef Name) const {
-  llvm::StringMap<unsigned>::const_iterator Pos = SubModuleIndex.find(Name);
-  if (Pos == SubModuleIndex.end())
-    return nullptr;
+  // Add new submodules into the index.
+  for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I)
+    SubModuleIndex[SubModules[I]->Name] = I;
 
-  return SubModules[Pos->getValue()];
+  if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end())
+    return SubModules[It->second];
+
+  return nullptr;
 }
 
 Module *Module::getGlobalModuleFragment() const {
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 201ab91cf68ca1..10774429a2177b 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
         SmallString<32> NameBuf;
         StringRef Name = sanitizeFilenameAsIdentifier(
             llvm::sys::path::stem(SkippedDir.getName()), NameBuf);
-        Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
-                                    Explicit).first;
+        Result =
+            findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
         setInferredModuleAllowedBy(Result, UmbrellaModuleMap);
 
         // Associate the module and the directory.
@@ -672,8 +672,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
       SmallString<32> NameBuf;
       StringRef Name = sanitizeFilenameAsIdentifier(
                          llvm::sys::path::stem(File.getName()), NameBuf);
-      Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
-                                  Explicit).first;
+      Result =
+          findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit);
       setInferredModuleAllowedBy(Result, UmbrellaModuleMap);
       Result->addTopHeader(File);
 
@@ -857,15 +857,21 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
   return Context->findSubmodule(Name);
 }
 
-std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
-                                                        Module *Parent,
-                                                        bool IsFramework,
-                                                        bool IsExplicit) {
+Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent,
+                                      bool IsFramework, bool IsExplicit) {
   // Try to find an existing module with this name.
   if (Module *Sub = lookupModuleQualified(Name, Parent))
-    return std::make_pair(Sub, false);
+    return Sub;
 
   // Create a new module with this name.
+  return createModule(Name, Parent, IsFramework, IsExplicit);
+}
+
+Module *ModuleMap::createModule(StringRef Name, Module *Parent,
+                                bool IsFramework, bool IsExplicit) {
+  assert(lookupModuleQualified(Name, Parent) == nullptr &&
+         "Creating duplicate submodule");
+
   Module *Result = new (ModulesAlloc.Allocate())
       Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent,
              IsFramework, IsExplicit, NumCreatedModules++);
@@ -875,7 +881,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
     Modules[Name] = Result;
     ModuleScopeIDs[Result] = CurrentModuleScopeID;
   }
-  return std::make_pair(Result, true);
+  return Result;
 }
 
 Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc,
@@ -2124,8 +2130,7 @@ void ModuleMapParser::parseModuleDecl() {
         Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
   } else {
     ActiveModule =
-        Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
-            .first;
+        Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit);
   }
 
   ActiveModule->DefinitionLoc = ModuleNameLoc;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 2419ed84e68acf..74a79ac54bb4eb 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5756,6 +5756,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
     return Err;
 
   ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+  bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
+  // If we don't know the top-level module, there's no point in doing qualified
+  // lookup of its submodules; it won't find anything anywhere within this tree.
+  // Let's skip that and avoid some string lookups.
+  auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule
+                                           : &ModuleMap::findOrCreateModule;
+
   bool First = true;
   Module *CurrentModule = nullptr;
   RecordData Record;
@@ -5829,11 +5836,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       if (Parent)
         ParentModule = getSubmodule(Parent);
 
-      // Retrieve this (sub)module from the module map, creating it if
-      // necessary.
-      CurrentModule =
-          ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
-              .first;
+      CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule,
+                                  IsFramework, IsExplicit);
 
       SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
       if (GlobalIndex >= SubmodulesLoaded.size() ||

>From 21295bc48b1f0392a71f0768f41f38d375e0c195 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 28 Oct 2024 11:28:00 -0700
Subject: [PATCH 2/2] Document new function

---
 clang/include/clang/Lex/ModuleMap.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index b4a8e0e358ffbe..5ee152e4213abf 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -544,6 +544,9 @@ class ModuleMap {
   /// \returns The found or newly-created module.
   Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
                              bool IsExplicit);
+  /// Create new submodule, assuming it does not exist. This function can only
+  /// be called when it is guaranteed that this submodule does not exist yet.
+  /// The parameters have same semantics as \c ModuleMap::findOrCreateModule.
   Module *createModule(StringRef Name, Module *Parent, bool IsFramework,
                        bool IsExplicit);
 



More information about the cfe-commits mailing list