[clang] [clang][modules] Avoid calling expensive `SourceManager::translateFile()` (PR #86216)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 21 16:29:29 PDT 2024


https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/86216

The `ASTWriter` algorithm for computing affecting module maps uses `SourceManager::translateFile()` to get a `FileID` from a `FileEntry`. This is slow (O(n)) since the function performs a linear walk over `SLocEntries` until it finds one with a matching `FileEntry`.

This patch removes this use of `SourceManager::translateFile()` by tracking `FileID` instead of `FileEntry` in couple of places in `ModuleMap`, giving `ASTWriter` the desired `FileID` directly. There are no changes required for clients that still want a `FileEntry` from `ModuleMap`: the existing APIs internally use `SourceManager` to perform the reverse `FileID` to `FileEntry` conversion in O(1).

>From 4dd48b40a034edf0b124ab08055a334ad7abd5ba Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 21 Mar 2024 14:40:02 -0700
Subject: [PATCH] [clang][modules] Avoid calling expensive
 `SourceManager::translateFile()`

---
 clang/include/clang/Lex/ModuleMap.h   | 15 +++---
 clang/lib/Frontend/FrontendAction.cpp |  7 ++-
 clang/lib/Lex/ModuleMap.cpp           | 66 +++++++++++++++------------
 clang/lib/Serialization/ASTWriter.cpp | 23 +++++-----
 4 files changed, 61 insertions(+), 50 deletions(-)

diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 867cb6eab42f2d..2e28ff6823cb2a 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -263,8 +263,8 @@ class ModuleMap {
     Attributes Attrs;
 
     /// If \c InferModules is non-zero, the module map file that allowed
-    /// inferred modules.  Otherwise, nullopt.
-    OptionalFileEntryRef ModuleMapFile;
+    /// inferred modules.  Otherwise, invalid.
+    FileID ModuleMapFID;
 
     /// The names of modules that cannot be inferred within this
     /// directory.
@@ -279,8 +279,7 @@ class ModuleMap {
 
   /// A mapping from an inferred module to the module map that allowed the
   /// inference.
-  // FIXME: Consider making the values non-optional.
-  llvm::DenseMap<const Module *, OptionalFileEntryRef> InferredModuleAllowedBy;
+  llvm::DenseMap<const Module *, FileID> InferredModuleAllowedBy;
 
   llvm::DenseMap<const Module *, AdditionalModMapsSet> AdditionalModMaps;
 
@@ -618,8 +617,9 @@ class ModuleMap {
   ///
   /// \param Module The module whose module map file will be returned, if known.
   ///
-  /// \returns The file entry for the module map file containing the given
-  /// module, or nullptr if the module definition was inferred.
+  /// \returns The FileID for the module map file containing the given module,
+  /// invalid if the module definition was inferred.
+  FileID getContainingModuleMapFileID(const Module *Module) const;
   OptionalFileEntryRef getContainingModuleMapFile(const Module *Module) const;
 
   /// Get the module map file that (along with the module name) uniquely
@@ -631,9 +631,10 @@ class ModuleMap {
   /// of inferred modules, returns the module map that allowed the inference
   /// (e.g. contained 'module *'). Otherwise, returns
   /// getContainingModuleMapFile().
+  FileID getModuleMapFileIDForUniquing(const Module *M) const;
   OptionalFileEntryRef getModuleMapFileForUniquing(const Module *M) const;
 
-  void setInferredModuleAllowedBy(Module *M, OptionalFileEntryRef ModMap);
+  void setInferredModuleAllowedBy(Module *M, FileID ModMapFID);
 
   /// Canonicalize \p Path in a manner suitable for a module map file. In
   /// particular, this canonicalizes the parent directory separately from the
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index b9fd9b8897b7e7..bc4c901325c7e1 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -535,8 +535,13 @@ static Module *prepareToBuildModule(CompilerInstance &CI,
     if (*OriginalModuleMap != CI.getSourceManager().getFileEntryRefForID(
                                   CI.getSourceManager().getMainFileID())) {
       M->IsInferred = true;
+      bool IsSystem = false; // TODO: Propagate the real thing here.
+      auto FileCharacter =
+          IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap;
+      FileID OriginalModuleMapFID = CI.getSourceManager().getOrCreateFileID(
+          *OriginalModuleMap, FileCharacter);
       CI.getPreprocessor().getHeaderSearchInfo().getModuleMap()
-        .setInferredModuleAllowedBy(M, *OriginalModuleMap);
+        .setInferredModuleAllowedBy(M, OriginalModuleMapFID);
     }
   }
 
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 10c475f617d485..eed7eca2e73562 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -648,8 +648,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
       UmbrellaModule = UmbrellaModule->Parent;
 
     if (UmbrellaModule->InferSubmodules) {
-      OptionalFileEntryRef UmbrellaModuleMap =
-          getModuleMapFileForUniquing(UmbrellaModule);
+      FileID UmbrellaModuleMap = getModuleMapFileIDForUniquing(UmbrellaModule);
 
       // Infer submodules for each of the directories we found between
       // the directory of the umbrella header and the directory where
@@ -1021,7 +1020,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir,
 
   // If the framework has a parent path from which we're allowed to infer
   // a framework module, do so.
-  OptionalFileEntryRef ModuleMapFile;
+  FileID ModuleMapFID;
   if (!Parent) {
     // Determine whether we're allowed to infer a module map.
     bool canInfer = false;
@@ -1060,7 +1059,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir,
           Attrs.IsExhaustive |= inferred->second.Attrs.IsExhaustive;
           Attrs.NoUndeclaredIncludes |=
               inferred->second.Attrs.NoUndeclaredIncludes;
-          ModuleMapFile = inferred->second.ModuleMapFile;
+          ModuleMapFID = inferred->second.ModuleMapFID;
         }
       }
     }
@@ -1069,7 +1068,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir,
     if (!canInfer)
       return nullptr;
   } else {
-    ModuleMapFile = getModuleMapFileForUniquing(Parent);
+    ModuleMapFID = getModuleMapFileIDForUniquing(Parent);
   }
 
   // Look for an umbrella header.
@@ -1086,7 +1085,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir,
   Module *Result = new Module(ModuleName, SourceLocation(), Parent,
                               /*IsFramework=*/true, /*IsExplicit=*/false,
                               NumCreatedModules++);
-  InferredModuleAllowedBy[Result] = ModuleMapFile;
+  InferredModuleAllowedBy[Result] = ModuleMapFID;
   Result->IsInferred = true;
   if (!Parent) {
     if (LangOpts.CurrentModule == ModuleName)
@@ -1307,28 +1306,34 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,
     Cb->moduleMapAddHeader(Header.Entry.getName());
 }
 
-OptionalFileEntryRef
-ModuleMap::getContainingModuleMapFile(const Module *Module) const {
+FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const {
   if (Module->DefinitionLoc.isInvalid())
-    return std::nullopt;
+    return {};
 
-  return SourceMgr.getFileEntryRefForID(
-      SourceMgr.getFileID(Module->DefinitionLoc));
+  return SourceMgr.getFileID(Module->DefinitionLoc);
 }
 
 OptionalFileEntryRef
-ModuleMap::getModuleMapFileForUniquing(const Module *M) const {
+ModuleMap::getContainingModuleMapFile(const Module *Module) const {
+  return SourceMgr.getFileEntryRefForID(getContainingModuleMapFileID(Module));
+}
+
+FileID ModuleMap::getModuleMapFileIDForUniquing(const Module *M) const {
   if (M->IsInferred) {
     assert(InferredModuleAllowedBy.count(M) && "missing inferred module map");
     return InferredModuleAllowedBy.find(M)->second;
   }
-  return getContainingModuleMapFile(M);
+  return getContainingModuleMapFileID(M);
+}
+
+OptionalFileEntryRef
+ModuleMap::getModuleMapFileForUniquing(const Module *M) const {
+  return SourceMgr.getFileEntryRefForID(getModuleMapFileIDForUniquing(M));
 }
 
-void ModuleMap::setInferredModuleAllowedBy(Module *M,
-                                           OptionalFileEntryRef ModMap) {
+void ModuleMap::setInferredModuleAllowedBy(Module *M, FileID ModMapFID) {
   assert(M->IsInferred && "module not inferred");
-  InferredModuleAllowedBy[M] = ModMap;
+  InferredModuleAllowedBy[M] = ModMapFID;
 }
 
 std::error_code
@@ -1517,7 +1522,7 @@ namespace clang {
     ModuleMap ⤅
 
     /// The current module map file.
-    FileEntryRef ModuleMapFile;
+    FileID ModuleMapFID;
 
     /// Source location of most recent parsed module declaration
     SourceLocation CurrModuleDeclLoc;
@@ -1585,13 +1590,12 @@ namespace clang {
     bool parseOptionalAttributes(Attributes &Attrs);
 
   public:
-    explicit ModuleMapParser(Lexer &L, SourceManager &SourceMgr,
-                             const TargetInfo *Target, DiagnosticsEngine &Diags,
-                             ModuleMap &Map, FileEntryRef ModuleMapFile,
-                             DirectoryEntryRef Directory, bool IsSystem)
+    ModuleMapParser(Lexer &L, SourceManager &SourceMgr,
+                    const TargetInfo *Target, DiagnosticsEngine &Diags,
+                    ModuleMap &Map, FileID ModuleMapFID,
+                    DirectoryEntryRef Directory, bool IsSystem)
         : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map),
-          ModuleMapFile(ModuleMapFile), Directory(Directory),
-          IsSystem(IsSystem) {
+          ModuleMapFID(ModuleMapFID), Directory(Directory), IsSystem(IsSystem) {
       Tok.clear();
       consumeToken();
     }
@@ -2011,11 +2015,13 @@ void ModuleMapParser::parseModuleDecl() {
     }
 
     if (TopLevelModule &&
-        ModuleMapFile != Map.getContainingModuleMapFile(TopLevelModule)) {
-      assert(ModuleMapFile != Map.getModuleMapFileForUniquing(TopLevelModule) &&
+        ModuleMapFID != Map.getContainingModuleMapFileID(TopLevelModule)) {
+      assert(ModuleMapFID !=
+                 Map.getModuleMapFileIDForUniquing(TopLevelModule) &&
              "submodule defined in same file as 'module *' that allowed its "
              "top-level module");
-      Map.addAdditionalModuleMapFile(TopLevelModule, ModuleMapFile);
+      Map.addAdditionalModuleMapFile(
+          TopLevelModule, *SourceMgr.getFileEntryRefForID(ModuleMapFID));
     }
   }
 
@@ -2120,7 +2126,8 @@ void ModuleMapParser::parseModuleDecl() {
     ActiveModule->NoUndeclaredIncludes = true;
   ActiveModule->Directory = Directory;
 
-  StringRef MapFileName(ModuleMapFile.getName());
+  StringRef MapFileName(
+      SourceMgr.getFileEntryRefForID(ModuleMapFID)->getName());
   if (MapFileName.ends_with("module.private.modulemap") ||
       MapFileName.ends_with("module_private.map")) {
     ActiveModule->ModuleMapIsPrivate = true;
@@ -2906,7 +2913,7 @@ void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) {
     // We'll be inferring framework modules for this directory.
     Map.InferredDirectories[Directory].InferModules = true;
     Map.InferredDirectories[Directory].Attrs = Attrs;
-    Map.InferredDirectories[Directory].ModuleMapFile = ModuleMapFile;
+    Map.InferredDirectories[Directory].ModuleMapFID = ModuleMapFID;
     // FIXME: Handle the 'framework' keyword.
   }
 
@@ -3139,8 +3146,7 @@ bool ModuleMap::parseModuleMapFile(FileEntryRef File, bool IsSystem,
           Buffer->getBufferStart() + (Offset ? *Offset : 0),
           Buffer->getBufferEnd());
   SourceLocation Start = L.getSourceLocation();
-  ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, File, Dir,
-                         IsSystem);
+  ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, ID, Dir, IsSystem);
   bool Result = Parser.parseModuleMapFile();
   ParsedModuleMap[File] = Result;
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 221409d011a38c..525f431da5d0da 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -161,8 +161,8 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) {
 
 namespace {
 
-std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
-                                                   Module *RootModule) {
+llvm::DenseSet<FileEntryRef> GetAffectingModuleMaps(const Preprocessor &PP,
+                                                    Module *RootModule) {
   SmallVector<const Module *> ModulesToProcess{RootModule};
 
   const HeaderSearch &HS = PP.getHeaderSearchInfo();
@@ -193,17 +193,17 @@ std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
   const ModuleMap &MM = HS.getModuleMap();
   SourceManager &SourceMgr = PP.getSourceManager();
 
-  std::set<const FileEntry *> ModuleMaps{};
-  auto CollectIncludingModuleMaps = [&](FileEntryRef F) {
-    if (!ModuleMaps.insert(F).second)
+  llvm::DenseSet<FileEntryRef> ModuleMaps;
+  auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef FE) {
+    if (!ModuleMaps.insert(FE).second)
       return;
-    FileID FID = SourceMgr.translateFile(F);
     SourceLocation Loc = SourceMgr.getIncludeLoc(FID);
     // The include location of inferred module maps can point into the header
     // file that triggered the inferring. Cut off the walk if that's the case.
     while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) {
       FID = SourceMgr.getFileID(Loc);
-      if (!ModuleMaps.insert(*SourceMgr.getFileEntryRefForID(FID)).second)
+      FE = *SourceMgr.getFileEntryRefForID(FID);
+      if (!ModuleMaps.insert(FE).second)
         break;
       Loc = SourceMgr.getIncludeLoc(FID);
     }
@@ -216,13 +216,13 @@ std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
         break;
       // The containing module map is affecting, because it's being pointed
       // into by Module::DefinitionLoc.
-      if (auto ModuleMapFile = MM.getContainingModuleMapFile(Mod))
-        CollectIncludingModuleMaps(*ModuleMapFile);
+      if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid())
+        CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
       // For inferred modules, the module map that allowed inferring is not in
       // the include chain of the virtual containing module map file. It did
       // affect the compilation, though.
-      if (auto ModuleMapFile = MM.getModuleMapFileForUniquing(Mod))
-        CollectIncludingModuleMaps(*ModuleMapFile);
+      if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid())
+        CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
     }
   };
 
@@ -4728,7 +4728,6 @@ void ASTWriter::computeNonAffectingInputFiles() {
       continue;
 
     if (!isModuleMap(File.getFileCharacteristic()) ||
-        AffectingModuleMaps.empty() ||
         llvm::is_contained(AffectingModuleMaps, *Cache->OrigEntry))
       continue;
 



More information about the cfe-commits mailing list