[cfe-commits] r173405 - in /cfe/trunk: include/clang/Serialization/ASTReader.h include/clang/Serialization/GlobalModuleIndex.h lib/Frontend/CompilerInstance.cpp lib/Serialization/ASTReader.cpp lib/Serialization/GlobalModuleIndex.cpp lib/Serialization/ModuleManager.cpp test/Modules/global_index.m

Douglas Gregor dgregor at apple.com
Thu Jan 24 17:03:04 PST 2013


Author: dgregor
Date: Thu Jan 24 19:03:03 2013
New Revision: 173405

URL: http://llvm.org/viewvc/llvm-project?rev=173405&view=rev
Log:
Implement the reader of the global module index and wire it into the
AST reader.

The global module index tracks all of the identifiers known to a set
of module files. Lookup of those identifiers looks first in the global
module index, which returns the set of module files in which that
identifier can be found. The AST reader only needs to look into those
module files and any module files not known to the global index (e.g.,
because they were (re)built after the global index), reducing the
number of on-disk hash tables to visit. For an example source I'm
looking at, we go from 237844 total identifier lookups into on-disk
hash tables down to 126817.

Unfortunately, this does not translate into a performance advantage.
At best, it's a wash once the global module index has been built, but
that's ignore the cost of building the global module index (which
is itself fairly large). Profiles show that the global module index
code is far less efficient than it should be; optimizing it might give
enough of an advantage to justify its continued inclusion.


Modified:
    cfe/trunk/include/clang/Serialization/ASTReader.h
    cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h
    cfe/trunk/lib/Frontend/CompilerInstance.cpp
    cfe/trunk/lib/Serialization/ASTReader.cpp
    cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp
    cfe/trunk/lib/Serialization/ModuleManager.cpp
    cfe/trunk/test/Modules/global_index.m

Modified: cfe/trunk/include/clang/Serialization/ASTReader.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTReader.h?rev=173405&r1=173404&r2=173405&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTReader.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTReader.h Thu Jan 24 19:03:03 2013
@@ -68,6 +68,7 @@
 class CXXBaseSpecifier;
 class CXXConstructorDecl;
 class CXXCtorInitializer;
+class GlobalModuleIndex;
 class GotoStmt;
 class MacroDefinition;
 class NamedDecl;
@@ -292,6 +293,9 @@
   /// \brief The module manager which manages modules and their dependencies
   ModuleManager ModuleMgr;
 
+  /// \brief The global module index, if loaded.
+  llvm::OwningPtr<GlobalModuleIndex> GlobalIndex;
+
   /// \brief A map of global bit offsets to the module that stores entities
   /// at those bit offsets.
   ContinuousRangeMap<uint64_t, ModuleFile*, 4> GlobalBitOffsetsMap;
@@ -1157,6 +1161,18 @@
   /// \brief Set the AST deserialization listener.
   void setDeserializationListener(ASTDeserializationListener *Listener);
 
+  /// \brief Determine whether this AST reader has a global index.
+  bool hasGlobalIndex() const { return GlobalIndex; }
+
+  /// \brief Attempts to load the global index.
+  ///
+  /// \returns true if loading the global index has failed for any reason.
+  bool loadGlobalIndex();
+
+  /// \brief Determine whether we tried to load the global index, but failed,
+  /// e.g., because it is out-of-date or does not exist.
+  bool isGlobalIndexUnavailable() const;
+  
   /// \brief Initializes the ASTContext
   void InitializeContext();
 

Modified: cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h?rev=173405&r1=173404&r2=173405&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h (original)
+++ cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h Thu Jan 24 19:03:03 2013
@@ -18,13 +18,20 @@
 #ifndef LLVM_CLANG_SERIALIZATION_GLOBAL_MODULE_INDEX_H
 #define LLVM_CLANG_SERIALIZATION_GLOBAL_MODULE_INDEX_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include <utility>
 
+namespace llvm {
+class BitstreamCursor;
+class MemoryBuffer;
+}
+
 namespace clang {
 
-class DeclarationName;
 class DirectoryEntry;
 class FileEntry;
 class FileManager;
@@ -46,20 +53,63 @@
 /// can be queried to determine which modules the currently translation could
 /// or should load to fix a problem.
 class GlobalModuleIndex {
+  /// \brief Buffer containing the index file, which is lazily accessed so long
+  /// as the global module index is live.
+  llvm::OwningPtr<llvm::MemoryBuffer> Buffer;
+
+  /// \brief The hash table.
+  ///
+  /// This pointer actually points to a IdentifierIndexTable object,
+  /// but that type is only accessible within the implementation of
+  /// GlobalModuleIndex.
+  void *IdentifierIndex;
+
+  /// \brief Information about a given module file.
+  struct ModuleInfo {
+    ModuleInfo() : File() { }
+
+    /// \brief The module file entry.
+    const FileEntry *File;
+
+    /// \brief The module files on which this module directly depends.
+    llvm::SmallVector<const FileEntry *, 4> Dependencies;
+  };
+
+  /// \brief A mapping from module IDs to information about each module.
+  ///
+  /// This vector may have gaps, if module files have been removed or have
+  /// been updated since the index was built. A gap is indicated by an empty
+  /// \c File pointer.
+  llvm::SmallVector<ModuleInfo, 16> Modules;
+
+  /// \brief Lazily-populated mapping from module file entries to their
+  /// corresponding index into the \c Modules vector.
+  llvm::DenseMap<const FileEntry *, unsigned> ModulesByFile;
+
+  /// \brief The number of identifier lookups we performed.
+  unsigned NumIdentifierLookups;
+
+  /// \brief The number of identifier lookup hits, where we recognize the
+  /// identifier.
+  unsigned NumIdentifierLookupHits;
+
+  /// \brief The number of modules provided via skip sets.
+  unsigned NumIdentifierModulesSkipped;
+
   /// \brief Internal constructor. Use \c readIndex() to read an index.
-  explicit GlobalModuleIndex(FileManager &FileMgr);
+  explicit GlobalModuleIndex(FileManager &FileMgr, llvm::MemoryBuffer *Buffer,
+                             llvm::BitstreamCursor Cursor);
+
+  GlobalModuleIndex(const GlobalModuleIndex &); // DO NOT IMPLEMENT
+  GlobalModuleIndex &operator=(const GlobalModuleIndex &); // DO NOT IMPLEMENT
 
 public:
+  ~GlobalModuleIndex();
+
   /// \brief An error code returned when trying to read an index.
   enum ErrorCode {
     /// \brief No error occurred.
     EC_None,
-    /// \brief The index found was out-of-date, meaning that some of the
-    /// module files are newer than the index.
-    ///
-    /// This error code is not actually fatal, because if the index is
-    /// up-to-date for any module files, it is 
-    EC_OutOfDate,
     /// \brief No index was found.
     EC_NotFound,
     /// \brief Some other process is currently building the index; it is not
@@ -93,17 +143,33 @@
                              SmallVectorImpl<const FileEntry *> &Dependencies);
 
   /// \brief Look for all of the module files with a namespace-scope binding
-  /// for the given name, e.g., a global function, variable, or type with that
-  /// name, or declare a method with the selector.
+  /// for the given identifier, e.g., a global function, variable, or type with
+  /// that name, or declare a method with the selector.
   ///
-  /// \param Name The name or selector to look for.
+  /// \param Name The identifier to look for.
   ///
-  /// \param DeclaringModuleFiles Will be populated with the list of module
+  /// \param ModuleFiles Will be populated with the list of module
   /// files that declare entities with the given name.
   ///
   /// \returns true if any module files were found, false otherwise.
-  bool lookupName(DeclarationName Name,
-                  SmallVectorImpl<const FileEntry *> &DeclaringModuleFiles);
+  bool lookupIdentifier(StringRef Name,
+                        SmallVectorImpl<const FileEntry *> &ModuleFiles);
+
+  /// \brief A set of module files into which name lookup can be skipped,
+  /// because they are known not to contain any bindings for the given name.
+  typedef llvm::SmallPtrSet<const FileEntry *, 16> SkipSet;
+
+  /// \brief Compute the "skip set", meaning those known modules that do not
+  /// have some particular property.
+  ///
+  /// \param ModuleFiles The set of module files that has some property.
+  ///
+  /// \returns The set of known modules that do not have the property exhibited
+  /// by the files in \p ModuleFiles.
+  SkipSet computeSkipSet(const SmallVectorImpl<const FileEntry *> &ModuleFiles);
+
+  /// \brief Print statistics to standard error.
+  void printStats();
 
   /// \brief Write a global index into the given
   ///

Modified: cfe/trunk/lib/Frontend/CompilerInstance.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInstance.cpp?rev=173405&r1=173404&r2=173405&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/CompilerInstance.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInstance.cpp Thu Jan 24 19:03:03 2013
@@ -61,7 +61,9 @@
 }
 
 bool CompilerInstance::shouldBuildGlobalModuleIndex() const {
-  return BuildGlobalModuleIndex && !ModuleBuildFailed;
+  return (BuildGlobalModuleIndex ||
+          (ModuleManager && ModuleManager->isGlobalIndexUnavailable())) &&
+         !ModuleBuildFailed;
 }
 
 void CompilerInstance::setDiagnostics(DiagnosticsEngine *Value) {

Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=173405&r1=173404&r2=173405&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Thu Jan 24 19:03:03 2013
@@ -40,6 +40,7 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Serialization/ASTDeserializationListener.h"
+#include "clang/Serialization/GlobalModuleIndex.h"
 #include "clang/Serialization/ModuleManager.h"
 #include "clang/Serialization/SerializationDiagnostic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1377,15 +1378,17 @@
   class IdentifierLookupVisitor {
     StringRef Name;
     unsigned PriorGeneration;
+    GlobalModuleIndex::SkipSet &SkipSet;
     unsigned &NumIdentifierLookups;
     unsigned &NumIdentifierLookupHits;
     IdentifierInfo *Found;
 
   public:
     IdentifierLookupVisitor(StringRef Name, unsigned PriorGeneration,
+                            GlobalModuleIndex::SkipSet &SkipSet,
                             unsigned &NumIdentifierLookups,
                             unsigned &NumIdentifierLookupHits)
-      : Name(Name), PriorGeneration(PriorGeneration),
+      : Name(Name), PriorGeneration(PriorGeneration), SkipSet(SkipSet),
         NumIdentifierLookups(NumIdentifierLookups),
         NumIdentifierLookupHits(NumIdentifierLookupHits),
         Found()
@@ -1399,7 +1402,11 @@
       // If we've already searched this module file, skip it now.
       if (M.Generation <= This->PriorGeneration)
         return true;
-      
+
+      // If this module file is in the skip set, don't bother looking in it.
+      if (This->SkipSet.count(M.File))
+        return false;
+
       ASTIdentifierLookupTable *IdTable
         = (ASTIdentifierLookupTable *)M.IdentifierLookupTable;
       if (!IdTable)
@@ -1433,8 +1440,18 @@
   unsigned PriorGeneration = 0;
   if (getContext().getLangOpts().Modules)
     PriorGeneration = IdentifierGeneration[&II];
-  
-  IdentifierLookupVisitor Visitor(II.getName(), PriorGeneration,
+
+  // If there is a global index, look there first to determine which modules
+  // provably do not have any results for this identifier.
+  GlobalModuleIndex::SkipSet SkipSet;
+  if (!loadGlobalIndex()) {
+    SmallVector<const FileEntry *, 4> ModuleFiles;
+    if (GlobalIndex->lookupIdentifier(II.getName(), ModuleFiles)) {
+      SkipSet = GlobalIndex->computeSkipSet(ModuleFiles);
+    }
+  }
+
+  IdentifierLookupVisitor Visitor(II.getName(), PriorGeneration, SkipSet,
                                   NumIdentifierLookups,
                                   NumIdentifierLookupHits);
   ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor);
@@ -2660,6 +2677,32 @@
   }
 }
 
+bool ASTReader::loadGlobalIndex() {
+  if (GlobalIndex)
+    return false;
+
+  if (TriedLoadingGlobalIndex || !UseGlobalIndex ||
+      !Context.getLangOpts().Modules)
+    return true;
+  
+  // Try to load the global index.
+  TriedLoadingGlobalIndex = true;
+  StringRef ModuleCachePath
+    = getPreprocessor().getHeaderSearchInfo().getModuleCachePath();
+  std::pair<GlobalModuleIndex *, GlobalModuleIndex::ErrorCode> Result
+    = GlobalModuleIndex::readIndex(FileMgr, ModuleCachePath);
+  if (!Result.first)
+    return true;
+
+  GlobalIndex.reset(Result.first);
+  return false;
+}
+
+bool ASTReader::isGlobalIndexUnavailable() const {
+  return Context.getLangOpts().Modules && UseGlobalIndex &&
+         !hasGlobalIndex() && TriedLoadingGlobalIndex;
+}
+
 ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
                                             ModuleKind Type,
                                             SourceLocation ImportLoc,
@@ -2678,6 +2721,10 @@
   case ConfigurationMismatch:
   case HadErrors:
     ModuleMgr.removeModules(ModuleMgr.begin() + NumModules, ModuleMgr.end());
+
+    // If we find that any modules are unusable, the global index is going
+    // to be out-of-date. Just remove it.
+    GlobalIndex.reset();
     return ReadResult;
 
   case Success:
@@ -2779,7 +2826,7 @@
                        ObjCClassesLoaded[I],
                        PreviousGeneration);
   }
-  
+
   return Success;
 }
 
@@ -5605,6 +5652,11 @@
                  (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
   }
 
+  if (GlobalIndex) {
+    std::fprintf(stderr, "\n");
+    GlobalIndex->printStats();
+  }
+  
   std::fprintf(stderr, "\n");
   dump();
   std::fprintf(stderr, "\n");
@@ -5705,9 +5757,18 @@
 IdentifierInfo* ASTReader::get(const char *NameStart, const char *NameEnd) {
   // Note that we are loading an identifier.
   Deserializing AnIdentifier(this);
-  
-  IdentifierLookupVisitor Visitor(StringRef(NameStart, NameEnd - NameStart),
-                                  /*PriorGeneration=*/0,
+  StringRef Name(NameStart, NameEnd - NameStart);
+
+  // If there is a global index, look there first to determine which modules
+  // provably do not have any results for this identifier.
+  GlobalModuleIndex::SkipSet SkipSet;
+  if (!loadGlobalIndex()) {
+    SmallVector<const FileEntry *, 4> ModuleFiles;
+    if (GlobalIndex->lookupIdentifier(Name, ModuleFiles)) {
+      SkipSet = GlobalIndex->computeSkipSet(ModuleFiles);
+    }
+  }
+  IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0, SkipSet,
                                   NumIdentifierLookups,
                                   NumIdentifierLookupHits);
   ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor);

Modified: cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp?rev=173405&r1=173404&r2=173405&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp (original)
+++ cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp Thu Jan 24 19:03:03 2013
@@ -42,7 +42,7 @@
   enum IndexRecordTypes {
     /// \brief Contains version information and potentially other metadata,
     /// used to determine if we can read this global index file.
-    METADATA,
+    INDEX_METADATA,
     /// \brief Describes a module, including its file name and dependencies.
     MODULE,
     /// \brief The index for identifiers.
@@ -57,6 +57,378 @@
 static const unsigned CurrentVersion = 1;
 
 //----------------------------------------------------------------------------//
+// Global module index reader.
+//----------------------------------------------------------------------------//
+
+namespace {
+
+/// \brief Trait used to read the identifier index from the on-disk hash
+/// table.
+class IdentifierIndexReaderTrait {
+public:
+  typedef StringRef external_key_type;
+  typedef StringRef internal_key_type;
+  typedef SmallVector<unsigned, 2> data_type;
+
+  static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
+    return a == b;
+  }
+
+  static unsigned ComputeHash(const internal_key_type& a) {
+    return llvm::HashString(a);
+  }
+
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char*& d) {
+    using namespace clang::io;
+    unsigned KeyLen = ReadUnalignedLE16(d);
+    unsigned DataLen = ReadUnalignedLE16(d);
+    return std::make_pair(KeyLen, DataLen);
+  }
+
+  static const internal_key_type&
+  GetInternalKey(const external_key_type& x) { return x; }
+
+  static const external_key_type&
+  GetExternalKey(const internal_key_type& x) { return x; }
+
+  static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
+    return StringRef((const char *)d, n);
+  }
+
+  static data_type ReadData(const internal_key_type& k,
+                            const unsigned char* d,
+                            unsigned DataLen) {
+    using namespace clang::io;
+
+    data_type Result;
+    while (DataLen > 0) {
+      unsigned ID = ReadUnalignedLE32(d);
+      Result.push_back(ID);
+      DataLen -= 4;
+    }
+
+    return Result;
+  }
+};
+
+typedef OnDiskChainedHashTable<IdentifierIndexReaderTrait> IdentifierIndexTable;
+
+/// \brief Module information as it was loaded from the index file.
+struct LoadedModuleInfo {
+  const FileEntry *File;
+  SmallVector<unsigned, 2> Dependencies;
+  SmallVector<unsigned, 2> ImportedBy;
+};
+
+}
+
+GlobalModuleIndex::GlobalModuleIndex(FileManager &FileMgr,
+                                     llvm::MemoryBuffer *Buffer,
+                                     llvm::BitstreamCursor Cursor)
+  : Buffer(Buffer), IdentifierIndex()
+{
+  typedef llvm::DenseMap<unsigned, LoadedModuleInfo> LoadedModulesMap;
+  LoadedModulesMap LoadedModules;
+  
+  // Read the global index.
+  unsigned LargestID = 0;
+  bool InGlobalIndexBlock = false;
+  bool Done = false;
+  bool AnyOutOfDate = false;
+  while (!Done) {
+    llvm::BitstreamEntry Entry = Cursor.advance();
+
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::Error:
+      return;
+
+    case llvm::BitstreamEntry::EndBlock:
+      if (InGlobalIndexBlock) {
+        InGlobalIndexBlock = false;
+        Done = true;
+        continue;
+      }
+      return;
+
+
+    case llvm::BitstreamEntry::Record:
+      // Entries in the global index block are handled below.
+      if (InGlobalIndexBlock)
+        break;
+
+      return;
+
+    case llvm::BitstreamEntry::SubBlock:
+      if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
+        if (Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
+          return;
+
+        InGlobalIndexBlock = true;
+      } else if (Cursor.SkipBlock()) {
+        return;
+      }
+      continue;
+    }
+
+    SmallVector<uint64_t, 64> Record;
+    StringRef Blob;
+    switch ((IndexRecordTypes)Cursor.readRecord(Entry.ID, Record, &Blob)) {
+    case INDEX_METADATA:
+      // Make sure that the version matches.
+      if (Record.size() < 1 || Record[0] != CurrentVersion)
+        return;
+      break;
+
+    case MODULE: {
+      unsigned Idx = 0;
+      unsigned ID = Record[Idx++];
+      if (ID > LargestID)
+        LargestID = ID;
+      
+      off_t Size = Record[Idx++];
+      time_t ModTime = Record[Idx++];
+
+      // File name.
+      unsigned NameLen = Record[Idx++];
+      llvm::SmallString<64> FileName(Record.begin() + Idx,
+                                     Record.begin() + Idx + NameLen);
+      Idx += NameLen;
+
+      // Dependencies
+      unsigned NumDeps = Record[Idx++];
+      llvm::SmallVector<unsigned, 2>
+        Dependencies(Record.begin() + Idx, Record.begin() + Idx + NumDeps);
+
+      // Find the file. If we can't find it, ignore it.
+      const FileEntry *File = FileMgr.getFile(FileName);
+      if (!File) {
+        AnyOutOfDate = true;
+        break;
+      }
+
+      // If the module file is newer than the index, ignore it.
+      if (File->getSize() != Size || File->getModificationTime() != ModTime) {
+        AnyOutOfDate = true;
+        break;
+      }
+
+      // Record this module. The dependencies will be resolved later.
+      LoadedModuleInfo &Info = LoadedModules[ID];
+      Info.File = File;
+      Info.Dependencies.swap(Dependencies);
+      break;
+    }
+
+    case IDENTIFIER_INDEX:
+      // Wire up the identifier index.
+      if (Record[0]) {
+        IdentifierIndex = IdentifierIndexTable::Create(
+                            (const unsigned char *)Blob.data() + Record[0],
+                            (const unsigned char *)Blob.data(),
+                            IdentifierIndexReaderTrait());
+      }
+      break;
+    }
+  }
+
+  // If there are any modules that have gone out-of-date, prune out any modules
+  // that depend on them.
+  if (AnyOutOfDate) {
+    // First, build back links in the module dependency graph.
+    SmallVector<unsigned, 4> Stack;
+    for (LoadedModulesMap::iterator LM = LoadedModules.begin(),
+                                    LMEnd = LoadedModules.end();
+         LM != LMEnd; ++LM) {
+      unsigned ID = LM->first;
+
+      // If this module is out-of-date, push it onto the stack.
+      if (LM->second.File == 0)
+        Stack.push_back(ID);
+
+      for (unsigned I = 0, N = LM->second.Dependencies.size(); I != N; ++I) {
+        unsigned DepID = LM->second.Dependencies[I];
+        LoadedModulesMap::iterator Known = LoadedModules.find(DepID);
+        if (Known == LoadedModules.end() || !Known->second.File) {
+          // The dependency was out-of-date, so mark us as out of date.
+          // This is just an optimization.
+          if (LM->second.File)
+            Stack.push_back(ID);
+
+          LM->second.File = 0;
+          continue;
+        }
+
+        // Record this reverse dependency.
+        Known->second.ImportedBy.push_back(ID);
+      }
+    }
+
+    // Second, walk the back links from out-of-date modules to those modules
+    // that depend on them, making those modules out-of-date as well.
+    while (!Stack.empty()) {
+      unsigned ID = Stack.back();
+      Stack.pop_back();
+
+      LoadedModuleInfo &Info = LoadedModules[ID];
+      for (unsigned I = 0, N = Info.ImportedBy.size(); I != N; ++I) {
+        unsigned FromID = Info.ImportedBy[I];
+        if (LoadedModules[FromID].File) {
+          LoadedModules[FromID].File = 0;
+          Stack.push_back(FromID);
+        }
+      }
+    }
+  }
+
+  // Allocate the vector containing information about all of the modules.
+  Modules.resize(LargestID + 1);
+  for (LoadedModulesMap::iterator LM = LoadedModules.begin(),
+                                  LMEnd = LoadedModules.end();
+       LM != LMEnd; ++LM) {
+    if (!LM->second.File)
+      continue;
+    
+    Modules[LM->first].File = LM->second.File;
+
+    // Resolve dependencies. Drop any we can't resolve due to out-of-date
+    // module files.
+    for (unsigned I = 0, N = LM->second.Dependencies.size(); I != N; ++I) {
+      unsigned DepID = LM->second.Dependencies[I];
+      LoadedModulesMap::iterator Known = LoadedModules.find(DepID);
+      if (Known == LoadedModules.end() || !Known->second.File)
+        continue;
+
+      Modules[LM->first].Dependencies.push_back(Known->second.File);
+    }
+  }
+}
+
+GlobalModuleIndex::~GlobalModuleIndex() { }
+
+std::pair<GlobalModuleIndex *, GlobalModuleIndex::ErrorCode>
+GlobalModuleIndex::readIndex(FileManager &FileMgr, StringRef Path) {
+  // Load the index file, if it's there.
+  llvm::SmallString<128> IndexPath;
+  IndexPath += Path;
+  llvm::sys::path::append(IndexPath, IndexFileName);
+
+  llvm::OwningPtr<llvm::MemoryBuffer> Buffer(
+                                        FileMgr.getBufferForFile(IndexPath));
+  if (!Buffer)
+    return std::make_pair((GlobalModuleIndex *)0, EC_NotFound);
+
+  /// \brief The bitstream reader from which we'll read the AST file.
+  llvm::BitstreamReader Reader((const unsigned char *)Buffer->getBufferStart(),
+                               (const unsigned char *)Buffer->getBufferEnd());
+
+  /// \brief The main bitstream cursor for the main block.
+  llvm::BitstreamCursor Cursor(Reader);
+
+  // Sniff for the signature.
+  if (Cursor.Read(8) != 'B' ||
+      Cursor.Read(8) != 'C' ||
+      Cursor.Read(8) != 'G' ||
+      Cursor.Read(8) != 'I') {
+    return std::make_pair((GlobalModuleIndex *)0, EC_IOError);
+  }
+  
+  return std::make_pair(new GlobalModuleIndex(FileMgr, Buffer.take(), Cursor),
+                        EC_None);
+}
+
+void GlobalModuleIndex::getKnownModules(
+       SmallVectorImpl<const FileEntry *> &ModuleFiles) {
+  ModuleFiles.clear();
+  for (unsigned I = 0, N = Modules.size(); I != N; ++I) {
+    if (Modules[I].File)
+      ModuleFiles.push_back(Modules[I].File);
+  }
+}
+
+void GlobalModuleIndex::getModuleDependencies(
+       const clang::FileEntry *ModuleFile,
+       SmallVectorImpl<const clang::FileEntry *> &Dependencies) {
+  // If the file -> index mapping is empty, populate it now.
+  if (ModulesByFile.empty()) {
+    for (unsigned I = 0, N = Modules.size(); I != N; ++I) {
+      if (Modules[I].File)
+        ModulesByFile[Modules[I].File] = I;
+    }
+  }
+
+  // Look for information about this module file.
+  llvm::DenseMap<const FileEntry *, unsigned>::iterator Known
+    = ModulesByFile.find(ModuleFile);
+  if (Known == ModulesByFile.end())
+    return;
+
+  // Record dependencies.
+  Dependencies = Modules[Known->second].Dependencies;
+}
+
+bool GlobalModuleIndex::lookupIdentifier(
+       StringRef Name,
+       SmallVectorImpl<const FileEntry *> &ModuleFiles) {
+  ModuleFiles.clear();
+  
+  // If there's no identifier index, there is nothing we can do.
+  if (!IdentifierIndex)
+    return false;
+
+  // Look into the identifier index.
+  ++NumIdentifierLookups;
+  IdentifierIndexTable &Table
+    = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
+  IdentifierIndexTable::iterator Known = Table.find(Name);
+  if (Known == Table.end()) {
+    return true;
+  }
+
+  SmallVector<unsigned, 2> ModuleIDs = *Known;
+  for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
+    unsigned ID = ModuleIDs[I];
+    if (ID >= Modules.size() || !Modules[ID].File)
+      continue;
+
+    ModuleFiles.push_back(Modules[ID].File);
+  }
+
+  ++NumIdentifierLookupHits;
+  return true;
+}
+
+GlobalModuleIndex::SkipSet
+GlobalModuleIndex::computeSkipSet(
+  const SmallVectorImpl<const FileEntry *> &ModuleFiles) {
+  llvm::SmallPtrSet<const FileEntry *, 8> Found(ModuleFiles.begin(),
+                                                ModuleFiles.end());
+
+  SkipSet Result;
+  for (unsigned I = 0, N = Modules.size(); I != N; ++I) {
+    if (Modules[I].File && !Found.count(Modules[I].File))
+      Result.insert(Modules[I].File);
+  }
+
+  NumIdentifierModulesSkipped += Result.size();
+  return Result;
+}
+
+void GlobalModuleIndex::printStats() {
+  std::fprintf(stderr, "*** Global Module Index Statistics:\n");
+  if (NumIdentifierLookups) {
+    fprintf(stderr, "  %u / %u identifier lookups succeeded (%f%%)\n",
+            NumIdentifierLookupHits, NumIdentifierLookups,
+            (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
+  }
+  if (NumIdentifierLookups && NumIdentifierModulesSkipped) {
+    fprintf(stderr, "  %f modules skipped per lookup (on average)\n",
+            (double)NumIdentifierModulesSkipped/NumIdentifierLookups);
+  }
+  std::fprintf(stderr, "\n");
+}
+
+//----------------------------------------------------------------------------//
 // Global module index writer.
 //----------------------------------------------------------------------------//
 
@@ -151,7 +523,7 @@
 #define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
 #define RECORD(X) emitRecordID(X, #X, Stream, Record)
   BLOCK(GLOBAL_INDEX_BLOCK);
-  RECORD(METADATA);
+  RECORD(INDEX_METADATA);
   RECORD(MODULE);
   RECORD(IDENTIFIER_INDEX);
 #undef RECORD
@@ -160,7 +532,7 @@
   Stream.ExitBlock();
 }
 
-namespace clang {
+namespace {
   class InterestingASTIdentifierLookupTrait
     : public serialization::reader::ASTIdentifierLookupTraitBase {
 
@@ -209,18 +581,18 @@
   unsigned ID = getModuleFileInfo(File).ID;
 
   // Search for the blocks and records we care about.
-  enum { Outer, ControlBlock, ASTBlock } State = Outer;
+  enum { Other, ControlBlock, ASTBlock } State = Other;
   bool Done = false;
   while (!Done) {
-    const unsigned Flags = llvm::BitstreamCursor::AF_DontPopBlockAtEnd;
-    llvm::BitstreamEntry Entry = InStream.advance(Flags);
+    llvm::BitstreamEntry Entry = InStream.advance();
     switch (Entry.Kind) {
     case llvm::BitstreamEntry::Error:
-      return true;
+      Done = true;
+      continue;
 
     case llvm::BitstreamEntry::Record:
-      // In the outer state, just skip the record. We don't care.
-      if (State == Outer) {
+      // In the 'other' state, just skip the record. We don't care.
+      if (State == Other) {
         InStream.skipRecord(Entry.ID);
         continue;
       }
@@ -229,7 +601,7 @@
       break;
 
     case llvm::BitstreamEntry::SubBlock:
-      if (State == Outer && Entry.ID == CONTROL_BLOCK_ID) {
+      if (Entry.ID == CONTROL_BLOCK_ID) {
         if (InStream.EnterSubBlock(CONTROL_BLOCK_ID))
           return true;
 
@@ -238,14 +610,13 @@
         continue;
       }
 
-      if (State == Outer && Entry.ID == AST_BLOCK_ID) {
+      if (Entry.ID == AST_BLOCK_ID) {
         if (InStream.EnterSubBlock(AST_BLOCK_ID))
           return true;
 
         // Found the AST block.
         State = ASTBlock;
         continue;
-
       }
 
       if (InStream.SkipBlock())
@@ -254,10 +625,7 @@
       continue;
 
     case llvm::BitstreamEntry::EndBlock:
-      if (State == Outer) {
-        Done = true;
-      }
-      State = Outer;
+      State = Other;
       continue;
     }
 
@@ -312,6 +680,8 @@
         std::pair<StringRef, bool> Ident = *D;
         if (Ident.second)
           InterestingIdentifiers[Ident.first].push_back(ID);
+        else
+          (void)InterestingIdentifiers[Ident.first];
       }
     }
 
@@ -378,7 +748,7 @@
   // Write the metadata.
   SmallVector<uint64_t, 2> Record;
   Record.push_back(CurrentVersion);
-  Stream.EmitRecord(METADATA, Record);
+  Stream.EmitRecord(INDEX_METADATA, Record);
 
   // Write the set of known module files.
   for (ModuleFilesMap::iterator M = ModuleFiles.begin(),

Modified: cfe/trunk/lib/Serialization/ModuleManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ModuleManager.cpp?rev=173405&r1=173404&r2=173405&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ModuleManager.cpp (original)
+++ cfe/trunk/lib/Serialization/ModuleManager.cpp Thu Jan 24 19:03:03 2013
@@ -180,8 +180,7 @@
     if (Visitor(*CurrentModule, UserData)) {
       // The visitor has requested that cut off visitation of any
       // module that the current module depends on. To indicate this
-      // behavior, we mark all of the reachable modules as having N
-      // incoming edges (which is impossible otherwise).
+      // behavior, we mark all of the reachable modules as "skipped".
       SmallVector<ModuleFile *, 4> Stack;
       Stack.push_back(CurrentModule);
       Skipped[CurrentModule->Index] = true;
@@ -210,6 +209,8 @@
            M = CurrentModule->Imports.begin(),
            MEnd = CurrentModule->Imports.end();
          M != MEnd; ++M) {
+      if (Skipped[(*M)->Index])
+        continue;
       
       // Remove our current module as an impediment to visiting the
       // module we depend on. If we were the last unvisited module

Modified: cfe/trunk/test/Modules/global_index.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/global_index.m?rev=173405&r1=173404&r2=173405&view=diff
==============================================================================
--- cfe/trunk/test/Modules/global_index.m (original)
+++ cfe/trunk/test/Modules/global_index.m Thu Jan 24 19:03:03 2013
@@ -1,13 +1,20 @@
 // RUN: rm -rf %t
+// Run without global module index
+// RUN: %clang_cc1 -Wauto-import -fmodule-cache-path %t -fdisable-module-hash -fmodules -F %S/Inputs %s -verify
+// RUN: ls %t|not grep modules.idx
+// Run and create the global module index
 // RUN: %clang_cc1 -Wauto-import -fmodule-cache-path %t -fdisable-module-hash -fmodules -fmodules-global-index -F %S/Inputs %s -verify
 // RUN: ls %t|grep modules.idx
-// RUN: %clang_cc1 -Wauto-import -fmodule-cache-path %t -fdisable-module-hash -fmodules -fmodules-global-index -F %S/Inputs %s -verify
+// Run and use the global module index
+// RUN: %clang_cc1 -Wauto-import -fmodule-cache-path %t -fdisable-module-hash -fmodules -fmodules-global-index -F %S/Inputs %s -verify -print-stats 2>&1 | FileCheck %s
 // REQUIRES: shell
 
 // expected-no-diagnostics
 @import DependsOnModule;
 @import Module;
 
+// CHECK: *** Global Module Index Statistics:
+
 int *get_sub() {
   return Module_Sub;
 }





More information about the cfe-commits mailing list