r177750 - <rdar://problem/13479539> Simplify ModuleManager/GlobalModuleIndex interaction to eliminate a pile of extraneous stats().

Douglas Gregor dgregor at apple.com
Fri Mar 22 11:50:14 PDT 2013


Author: dgregor
Date: Fri Mar 22 13:50:14 2013
New Revision: 177750

URL: http://llvm.org/viewvc/llvm-project?rev=177750&view=rev
Log:
<rdar://problem/13479539> Simplify ModuleManager/GlobalModuleIndex interaction to eliminate a pile of extraneous stats().

The refactoring in r177367 introduced a serious performance bug where
the "lazy" resolution of module file names in the global module index
to actual module file entries in the module manager would perform
repeated negative stats(). The new interaction requires the module
manager to inform the global module index when a module file has been
loaded, eliminating the extraneous stat()s and a bunch of bookkeeping
on both sides.


Modified:
    cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h
    cfe/trunk/include/clang/Serialization/ModuleManager.h
    cfe/trunk/lib/Serialization/ASTReader.cpp
    cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp
    cfe/trunk/lib/Serialization/ModuleManager.cpp

Modified: cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h?rev=177750&r1=177749&r2=177750&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h (original)
+++ cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h Fri Mar 22 13:50:14 2013
@@ -20,6 +20,7 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include <utility>
 
@@ -43,36 +44,6 @@ using llvm::SmallVectorImpl;
 using llvm::StringRef;
 using serialization::ModuleFile;
 
-/// \brief Abstract class that resolves a module file name to a ModuleFile
-/// pointer, which is used to uniquely describe a module file.
-class ModuleFileNameResolver {
-public:
-  virtual ~ModuleFileNameResolver();
-
-  /// \brief Attempt to resolve the given module file name to a specific,
-  /// already-loaded module.
-  ///
-  /// \param FileName The name of the module file.
-  ///
-  /// \param ExpectedSize The size that the module file is expected to have.
-  /// If the actual size differs, the resolver should return \c true.
-  ///
-  /// \param ExpectedModTime The modification time that the module file is
-  /// expected to have. If the actual modification time differs, the resolver
-  /// should return \c true.
-  ///
-  /// \param File Will be set to the module file if there is one, or null
-  /// otherwise.
-  ///
-  /// \returns True if a module file exists but does not meet the size/
-  /// modification time criteria, false if the module file is available or has
-  /// not yet been loaded.
-  virtual bool resolveModuleFileName(StringRef FileName,
-                                     off_t ExpectedSize,
-                                     time_t ExpectedModTime,
-                                     ModuleFile *&File) = 0;
-};
-
 /// \brief A global index for a set of module files, providing information about
 /// the identifiers within those module files.
 ///
@@ -89,9 +60,6 @@ class GlobalModuleIndex {
   /// as the global module index is live.
   llvm::OwningPtr<llvm::MemoryBuffer> Buffer;
 
-  /// \brief The module file name resolver.
-  ModuleFileNameResolver *Resolver;
-
   /// \brief The hash table.
   ///
   /// This pointer actually points to a IdentifierIndexTable object,
@@ -103,7 +71,7 @@ class GlobalModuleIndex {
   struct ModuleInfo {
     ModuleInfo() : File(), Size(), ModTime() { }
 
-    /// \brief The module file, if it is known.
+    /// \brief The module file, once it has been resolved.
     ModuleFile *File;
 
     /// \brief The module file name.
@@ -119,9 +87,6 @@ class GlobalModuleIndex {
     /// \brief The module IDs on which this module directly depends.
     /// FIXME: We don't really need a vector here.
     llvm::SmallVector<unsigned, 4> Dependencies;
-
-    /// \brief The module IDs that directly depend on this module.
-    llvm::SmallVector<unsigned, 4> ImportedBy;
   };
 
   /// \brief A mapping from module IDs to information about each module.
@@ -135,13 +100,19 @@ class GlobalModuleIndex {
   /// corresponding index into the \c Modules vector.
   llvm::DenseMap<ModuleFile *, unsigned> ModulesByFile;
 
+  /// \brief The set of modules that have not yet been resolved.
+  ///
+  /// The string is just the name of the module itself, which maps to the
+  /// module ID.
+  llvm::StringMap<unsigned> UnresolvedModules;
+
   /// \brief The number of identifier lookups we performed.
   unsigned NumIdentifierLookups;
 
   /// \brief The number of identifier lookup hits, where we recognize the
   /// identifier.
   unsigned NumIdentifierLookupHits;
-
+  
   /// \brief Internal constructor. Use \c readIndex() to read an index.
   explicit GlobalModuleIndex(llvm::MemoryBuffer *Buffer,
                              llvm::BitstreamCursor Cursor);
@@ -200,19 +171,11 @@ public:
   /// \returns true if the identifier is known to the index, false otherwise.
   bool lookupIdentifier(StringRef Name, HitSet &Hits);
 
-  /// \brief Set the module file name resolver.
-  void setResolver(ModuleFileNameResolver *Resolver) {
-    this->Resolver = Resolver;
-  }
-
-  /// \brief Note that additional modules have been loaded, which invalidates
-  /// the module file -> module cache.
-  void noteAdditionalModulesLoaded() {
-    ModulesByFile.clear();
-  }
-
-  /// \brief Resolve the module file for the module with the given ID.
-  ModuleFile *resolveModuleFile(unsigned ID);
+  /// \brief Note that the given module file has been loaded.
+  ///
+  /// \returns false if the global module index has information about this
+  /// module file, and true otherwise.
+  bool loadedModuleFile(ModuleFile *File);
 
   /// \brief Print statistics to standard error.
   void printStats();

Modified: cfe/trunk/include/clang/Serialization/ModuleManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ModuleManager.h?rev=177750&r1=177749&r2=177750&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ModuleManager.h (original)
+++ cfe/trunk/include/clang/Serialization/ModuleManager.h Fri Mar 22 13:50:14 2013
@@ -16,18 +16,18 @@
 #define LLVM_CLANG_SERIALIZATION_MODULE_MANAGER_H
 
 #include "clang/Basic/FileManager.h"
-#include "clang/Serialization/GlobalModuleIndex.h"
 #include "clang/Serialization/Module.h"
 #include "llvm/ADT/DenseMap.h"
 
 namespace clang { 
 
+class GlobalModuleIndex;
 class ModuleMap;
 
 namespace serialization {
-  
+
 /// \brief Manages the set of modules loaded by an AST reader.
-class ModuleManager : public ModuleFileNameResolver {
+class ModuleManager {
   /// \brief The chain of AST files. The first entry is the one named by the
   /// user, the last one is the one that doesn't depend on anything further.
   SmallVector<ModuleFile *, 2> Chain;
@@ -61,9 +61,6 @@ class ModuleManager : public ModuleFileN
   /// just an non-owning pointer.
   GlobalModuleIndex *GlobalIndex;
 
-  /// \brief Update the set of modules files we know about known to the global index.
-  void updateModulesInCommonWithGlobalIndex();
-
   /// \brief State used by the "visit" operation to avoid malloc traffic in
   /// calls to visit().
   struct VisitState {
@@ -202,6 +199,10 @@ public:
   /// \brief Set the global module index.
   void setGlobalIndex(GlobalModuleIndex *Index);
 
+  /// \brief Notification from the AST reader that the given module file
+  /// has been "accepted", and will not (can not) be unloaded.
+  void moduleFileAccepted(ModuleFile *MF);
+
   /// \brief Visit each of the modules.
   ///
   /// This routine visits each of the modules, starting with the
@@ -270,11 +271,6 @@ public:
                         time_t ExpectedModTime,
                         const FileEntry *&File);
 
-  virtual bool resolveModuleFileName(StringRef FileName,
-                                     off_t ExpectedSize,
-                                     time_t ExpectedModTime,
-                                     ModuleFile *&File);
-
   /// \brief View the graphviz representation of the module graph.
   void viewGraph();
 };

Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=177750&r1=177749&r2=177750&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Fri Mar 22 13:50:14 2013
@@ -2872,11 +2872,16 @@ ASTReader::ASTReadResult ASTReader::Read
     }
   }
 
-  // Setup the import locations.
+  // Setup the import locations and notify the module manager that we've
+  // committed to these module files.
   for (SmallVectorImpl<ImportedModule>::iterator M = Loaded.begin(),
                                               MEnd = Loaded.end();
        M != MEnd; ++M) {
     ModuleFile &F = *M->Mod;
+
+    ModuleMgr.moduleFileAccepted(&F);
+
+    // Set the import location.
     F.DirectImportLoc = ImportLoc;
     if (!M->ImportedBy)
       F.ImportLoc = M->ImportLoc;

Modified: cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp?rev=177750&r1=177749&r2=177750&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp (original)
+++ cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp Fri Mar 22 13:50:14 2013
@@ -16,6 +16,7 @@
 #include "clang/Basic/OnDiskHashTable.h"
 #include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
+#include "clang/Serialization/Module.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallString.h"
@@ -57,8 +58,6 @@ static const char * const IndexFileName
 /// \brief The global index file version.
 static const unsigned CurrentVersion = 1;
 
-ModuleFileNameResolver::~ModuleFileNameResolver() { }
-
 //----------------------------------------------------------------------------//
 // Global module index reader.
 //----------------------------------------------------------------------------//
@@ -121,7 +120,7 @@ typedef OnDiskChainedHashTable<Identifie
 
 GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer,
                                      llvm::BitstreamCursor Cursor)
-  : Buffer(Buffer), Resolver(), IdentifierIndex(),
+  : Buffer(Buffer), IdentifierIndex(),
     NumIdentifierLookups(), NumIdentifierLookupHits()
 {
   // Read the global index.
@@ -201,6 +200,9 @@ GlobalModuleIndex::GlobalModuleIndex(llv
 
       // Make sure we're at the end of the record.
       assert(Idx == Record.size() && "More module info?");
+
+      // Record this module as an unresolved module.
+      UnresolvedModules[llvm::sys::path::stem(Modules[ID].FileName)] = ID;
       break;
     }
 
@@ -215,14 +217,6 @@ GlobalModuleIndex::GlobalModuleIndex(llv
       break;
     }
   }
-
-  // Compute imported-by relation.
-  for (unsigned ID = 0, IDEnd = Modules.size(); ID != IDEnd; ++ID) {
-    for (unsigned D = 0, DEnd = Modules[ID].Dependencies.size();
-         D != DEnd; ++D) {
-      Modules[Modules[ID].Dependencies[D]].ImportedBy.push_back(ID);
-    }
-  }
 }
 
 GlobalModuleIndex::~GlobalModuleIndex() { }
@@ -260,21 +254,14 @@ void
 GlobalModuleIndex::getKnownModules(SmallVectorImpl<ModuleFile *> &ModuleFiles) {
   ModuleFiles.clear();
   for (unsigned I = 0, N = Modules.size(); I != N; ++I) {
-    if (ModuleFile *File = resolveModuleFile(I))
-      ModuleFiles.push_back(File);
+    if (ModuleFile *MF = Modules[I].File)
+      ModuleFiles.push_back(MF);
   }
 }
 
 void GlobalModuleIndex::getModuleDependencies(
        ModuleFile *File,
        SmallVectorImpl<ModuleFile *> &Dependencies) {
-  // If the file -> index mapping is empty, populate it now.
-  if (ModulesByFile.empty()) {
-    for (unsigned I = 0, N = Modules.size(); I != N; ++I) {
-      resolveModuleFile(I);
-    }
-  }
-
   // Look for information about this module file.
   llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
     = ModulesByFile.find(File);
@@ -285,7 +272,7 @@ void GlobalModuleIndex::getModuleDepende
   Dependencies.clear();
   ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
   for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
-    if (ModuleFile *MF = resolveModuleFile(I))
+    if (ModuleFile *MF = Modules[I].File)
       Dependencies.push_back(MF);
   }
 }
@@ -308,60 +295,39 @@ bool GlobalModuleIndex::lookupIdentifier
 
   SmallVector<unsigned, 2> ModuleIDs = *Known;
   for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
-    if (ModuleFile *File = resolveModuleFile(ModuleIDs[I]))
-      Hits.insert(File);
+    if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
+      Hits.insert(MF);
   }
 
   ++NumIdentifierLookupHits;
   return true;
 }
 
-ModuleFile *GlobalModuleIndex::resolveModuleFile(unsigned ID) {
-  assert(ID < Modules.size() && "Out-of-bounds module index");
-
-  // If we already have a module file, return it.
-  if (Modules[ID].File)
-    return Modules[ID].File;
-
-  // If we don't have a file name, or if there is no resolver, we can't
-  // resolve this.
-  if (Modules[ID].FileName.empty() || !Resolver)
-    return 0;
-
-  // Try to resolve this module file.
-  ModuleFile *File;
-  if (Resolver->resolveModuleFileName(Modules[ID].FileName, Modules[ID].Size,
-                                      Modules[ID].ModTime, File)) {
-    // Clear out the module files for anything that depends on this module.
-    llvm::SmallVector<unsigned, 8> Stack;
-
-    Stack.push_back(ID);
-    while (!Stack.empty()) {
-      unsigned Victim = Stack.back();
-      Stack.pop_back();
-
-      // Mark this file as ignored.
-      Modules[Victim].File = 0;
-      Modules[Victim].FileName.clear();
-
-      // Push any not-yet-ignored imported modules onto the stack.
-      for (unsigned I = 0, N = Modules[Victim].ImportedBy.size(); I != N; ++I) {
-        unsigned NextVictim = Modules[Victim].ImportedBy[I];
-        if (!Modules[NextVictim].FileName.empty())
-          Stack.push_back(NextVictim);
-      }
-    }
-
-    return 0;
+bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) {
+  // Look for the module in the global module index based on the module name.
+  StringRef Name = llvm::sys::path::stem(File->FileName);
+  llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
+  if (Known == UnresolvedModules.end()) {
+    return true;
   }
 
-  // If we have a module file, record it.
-  if (File) {
-    Modules[ID].File = File;
-    ModulesByFile[File] = ID;
-  }
+  // Rectify this module with the global module index.
+  ModuleInfo &Info = Modules[Known->second];
 
-  return File;
+  //  If the size and modification time match what we expected, record this
+  // module file.
+  bool Failed = true;
+  if (File->File->getSize() == Info.Size &&
+      File->File->getModificationTime() == Info.ModTime) {
+    Info.File = File;
+    ModulesByFile[File] = Known->second;
+
+    Failed = false;
+  }
+
+  // One way or another, we have resolved this module file.
+  UnresolvedModules.erase(Known);
+  return Failed;
 }
 
 void GlobalModuleIndex::printStats() {

Modified: cfe/trunk/lib/Serialization/ModuleManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ModuleManager.cpp?rev=177750&r1=177749&r2=177750&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ModuleManager.cpp (original)
+++ cfe/trunk/lib/Serialization/ModuleManager.cpp Fri Mar 22 13:50:14 2013
@@ -166,17 +166,6 @@ void ModuleManager::addInMemoryBuffer(St
   InMemoryBuffers[Entry] = Buffer;
 }
 
-void ModuleManager::updateModulesInCommonWithGlobalIndex() {
-  ModulesInCommonWithGlobalIndex.clear();
-
-  if (!GlobalIndex)
-    return;
-
-  // Collect the set of modules known to the global index.
-  GlobalIndex->noteAdditionalModulesLoaded();
-  GlobalIndex->getKnownModules(ModulesInCommonWithGlobalIndex);
-}
-
 ModuleManager::VisitState *ModuleManager::allocateVisitState() {
   // Fast path: if we have a cached state, use it.
   if (FirstVisitState) {
@@ -198,10 +187,25 @@ void ModuleManager::returnVisitState(Vis
 
 void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) {
   GlobalIndex = Index;
-  if (GlobalIndex) {
-    GlobalIndex->setResolver(this);
+  if (!GlobalIndex) {
+    ModulesInCommonWithGlobalIndex.clear();
+    return;
   }
-  updateModulesInCommonWithGlobalIndex();
+
+  // Notify the global module index about all of the modules we've already
+  // loaded.
+  for (unsigned I = 0, N = Chain.size(); I != N; ++I) {
+    if (!GlobalIndex->loadedModuleFile(Chain[I])) {
+      ModulesInCommonWithGlobalIndex.push_back(Chain[I]);
+    }
+  }
+}
+
+void ModuleManager::moduleFileAccepted(ModuleFile *MF) {
+  if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF))
+    return;
+
+  ModulesInCommonWithGlobalIndex.push_back(MF);
 }
 
 ModuleManager::ModuleManager(FileManager &FileMgr)
@@ -264,11 +268,6 @@ ModuleManager::visit(bool (*Visitor)(Mod
 
     assert(VisitOrder.size() == N && "Visitation order is wrong?");
 
-    // We may need to update the set of modules we have in common with the
-    // global module index, since modules could have been added to the module
-    // manager since we loaded the global module index.
-    updateModulesInCommonWithGlobalIndex();
-
     delete FirstVisitState;
     FirstVisitState = 0;
   }
@@ -387,34 +386,6 @@ bool ModuleManager::lookupModuleFile(Str
   return false;
 }
 
-bool ModuleManager::resolveModuleFileName(StringRef FileName,
-                                          off_t ExpectedSize,
-                                          time_t ExpectedModTime,
-                                          ModuleFile *&File) {
-  File = 0;
-  
-  // Look for the file entry corresponding to this name.
-  const FileEntry *F;
-  if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, F))
-    return true;
-
-  // If there is no file, we've succeeded (trivially).
-  if (!F)
-    return false;
-
-  // Determine whether we have a module file associated with this file entry.
-  llvm::DenseMap<const FileEntry *, ModuleFile *>::iterator Known
-    = Modules.find(F);
-  if (Known == Modules.end()) {
-    // We don't know about this module file; invalidate the cache.
-    FileMgr.invalidateCache(F);
-    return false;
-  }
-
-  File = Known->second;
-  return false;
-}
-
 #ifndef NDEBUG
 namespace llvm {
   template<>





More information about the cfe-commits mailing list