[cfe-commits] r125286 - in /cfe/trunk: include/clang/Basic/FileManager.h include/clang/Basic/OnDiskHashTable.h include/clang/Lex/HeaderSearch.h include/clang/Serialization/ASTBitCodes.h include/clang/Serialization/ASTReader.h include/clang/Serialization/ASTWriter.h lib/Basic/FileManager.cpp lib/Lex/HeaderSearch.cpp lib/Serialization/ASTReader.cpp lib/Serialization/ASTWriter.cpp

Douglas Gregor dgregor at apple.com
Thu Feb 10 09:09:37 PST 2011


Author: dgregor
Date: Thu Feb 10 11:09:37 2011
New Revision: 125286

URL: http://llvm.org/viewvc/llvm-project?rev=125286&view=rev
Log:
Implement two related optimizations that make de-serialization of
AST/PCH files more lazy:
  - Don't preload all of the file source-location entries when reading
  the AST file. Instead, load them lazily, when needed.
  - Only look up header-search information (whether a header was already
  #import'd, how many times it's been included, etc.) when it's needed
  by the preprocessor, rather than pre-populating it.

Previously, we would pre-load all of the file source-location entries,
which also populated the header-search information structure. This was
a relatively minor performance issue, since we would end up stat()'ing
all of the headers stored within a AST/PCH file when the AST/PCH file
was loaded. In the normal PCH use case, the stat()s were cached, so
the cost--of preloading ~860 source-location entries in the Cocoa.h
case---was relatively low.

However, the recent optimization that replaced stat+open with
open+fstat turned this into a major problem, since the preloading of
source-location entries would now end up opening those files. Worse,
those files wouldn't be closed until the file manager was destroyed,
so just opening a Cocoa.h PCH file would hold on to ~860 file
descriptors, and it was easy to blow through the process's limit on
the number of open file descriptors.

By eliminating the preloading of these files, we neither open nor stat
the headers stored in the PCH/AST file until they're actually needed
for something. Concretely, we went from

*** HeaderSearch Stats:
835 files tracked.
  364 #import/#pragma once files.
  823 included exactly once.
  6 max times a file is included.
  3 #include/#include_next/#import.
    0 #includes skipped due to the multi-include optimization.
1 framework lookups.
0 subframework lookups.

*** Source Manager Stats:
835 files mapped, 3 mem buffers mapped.
37460 SLocEntry's allocated, 11215575B of Sloc address space used.
62 bytes of files mapped, 0 files with line #'s computed.

with a trivial program that uses a chained PCH including a Cocoa PCH
to

*** HeaderSearch Stats:
4 files tracked.
  1 #import/#pragma once files.
  3 included exactly once.
  2 max times a file is included.
  3 #include/#include_next/#import.
    0 #includes skipped due to the multi-include optimization.
1 framework lookups.
0 subframework lookups.

*** Source Manager Stats:
3 files mapped, 3 mem buffers mapped.
37460 SLocEntry's allocated, 11215575B of Sloc address space used.
62 bytes of files mapped, 0 files with line #'s computed.

for the same program.


Modified:
    cfe/trunk/include/clang/Basic/FileManager.h
    cfe/trunk/include/clang/Basic/OnDiskHashTable.h
    cfe/trunk/include/clang/Lex/HeaderSearch.h
    cfe/trunk/include/clang/Serialization/ASTBitCodes.h
    cfe/trunk/include/clang/Serialization/ASTReader.h
    cfe/trunk/include/clang/Serialization/ASTWriter.h
    cfe/trunk/lib/Basic/FileManager.cpp
    cfe/trunk/lib/Lex/HeaderSearch.cpp
    cfe/trunk/lib/Serialization/ASTReader.cpp
    cfe/trunk/lib/Serialization/ASTWriter.cpp

Modified: cfe/trunk/include/clang/Basic/FileManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/FileManager.h?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/FileManager.h (original)
+++ cfe/trunk/include/clang/Basic/FileManager.h Thu Feb 10 11:09:37 2011
@@ -186,6 +186,12 @@
   static void FixupRelativePath(llvm::sys::Path &path,
                                 const FileSystemOptions &FSOpts);
   
+  
+  /// \brief Produce an array mapping from the unique IDs assigned to each
+  /// file to the corresponding FileEntry pointer.
+  void GetUniqueIDMapping(
+                    llvm::SmallVectorImpl<const FileEntry *> &UIDToFiles) const;
+  
   void PrintStats() const;
 };
 

Modified: cfe/trunk/include/clang/Basic/OnDiskHashTable.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/OnDiskHashTable.h?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/OnDiskHashTable.h (original)
+++ cfe/trunk/include/clang/Basic/OnDiskHashTable.h Thu Feb 10 11:09:37 2011
@@ -320,7 +320,7 @@
         InfoPtr->ReadKey((const unsigned char* const) Items, L.first);
 
       // If the key doesn't match just skip reading the value.
-      if (!Info::EqualKey(X, iKey)) {
+      if (!InfoPtr->EqualKey(X, iKey)) {
         Items += item_len;
         continue;
       }

Modified: cfe/trunk/include/clang/Lex/HeaderSearch.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/HeaderSearch.h?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/HeaderSearch.h (original)
+++ cfe/trunk/include/clang/Lex/HeaderSearch.h Thu Feb 10 11:09:37 2011
@@ -29,7 +29,7 @@
 /// file that is #included.
 struct HeaderFileInfo {
   /// isImport - True if this is a #import'd or #pragma once file.
-  bool isImport : 1;
+  unsigned isImport : 1;
 
   /// DirInfo - Keep track of whether this is a system header, and if so,
   /// whether it is C++ clean or not.  This can be set by the include paths or
@@ -37,10 +37,24 @@
   /// SrcMgr::CharacteristicKind.
   unsigned DirInfo : 2;
 
+  /// \brief Whether this header file info was supplied by an external source.
+  unsigned External : 1;
+  
+  /// \brief Whether this structure is considered to already have been
+  /// "resolved", meaning that it was loaded from the external source.
+  unsigned Resolved : 1;
+  
   /// NumIncludes - This is the number of times the file has been included
   /// already.
   unsigned short NumIncludes;
 
+  /// \brief The ID number of the controlling macro.
+  ///
+  /// This ID number will be non-zero when there is a controlling
+  /// macro whose IdentifierInfo may not yet have been loaded from
+  /// external storage.
+  unsigned ControllingMacroID;
+
   /// ControllingMacro - If this file has a #ifndef XXX (or equivalent) guard
   /// that protects the entire contents of the file, this is the identifier
   /// for the macro that controls whether or not it has any effect.
@@ -51,22 +65,36 @@
   /// external storage.
   const IdentifierInfo *ControllingMacro;
 
-  /// \brief The ID number of the controlling macro.
-  ///
-  /// This ID number will be non-zero when there is a controlling
-  /// macro whose IdentifierInfo may not yet have been loaded from
-  /// external storage.
-  unsigned ControllingMacroID;
-
   HeaderFileInfo()
-    : isImport(false), DirInfo(SrcMgr::C_User),
-      NumIncludes(0), ControllingMacro(0), ControllingMacroID(0) {}
+    : isImport(false), DirInfo(SrcMgr::C_User), External(false), 
+      Resolved(false), NumIncludes(0), ControllingMacroID(0), 
+      ControllingMacro(0)  {}
 
   /// \brief Retrieve the controlling macro for this header file, if
   /// any.
   const IdentifierInfo *getControllingMacro(ExternalIdentifierLookup *External);
+  
+  /// \brief Determine whether this is a non-default header file info, e.g.,
+  /// it corresponds to an actual header we've included or tried to include.
+  bool isNonDefault() const {
+    return isImport || NumIncludes || ControllingMacro || ControllingMacroID;
+  }
 };
 
+/// \brief An external source of header file information, which may supply
+/// information about header files already included.
+class ExternalHeaderFileInfoSource {
+public:
+  virtual ~ExternalHeaderFileInfoSource();
+  
+  /// \brief Retrieve the header file information for the given file entry.
+  ///
+  /// \returns Header file information for the given file entry, with the
+  /// \c External bit set. If the file entry is not known, return a 
+  /// default-constructed \c HeaderFileInfo.
+  virtual HeaderFileInfo GetHeaderFileInfo(const FileEntry *FE) = 0;
+};
+  
 /// HeaderSearch - This class encapsulates the information needed to find the
 /// file referenced by a #include or #include_next, (sub-)framework lookup, etc.
 class HeaderSearch {
@@ -107,6 +135,9 @@
   /// macros into IdentifierInfo pointers, as needed.
   ExternalIdentifierLookup *ExternalLookup;
 
+  /// \brief Entity used to look up stored header file information.
+  ExternalHeaderFileInfoSource *ExternalSource;
+  
   // Various statistics we track for performance analysis.
   unsigned NumIncluded;
   unsigned NumMultiIncludeFileOptzn;
@@ -141,6 +172,15 @@
     ExternalLookup = EIL;
   }
 
+  ExternalIdentifierLookup *getExternalLookup() const {
+    return ExternalLookup;
+  }
+  
+  /// \brief Set the external source of header information.
+  void SetExternalSource(ExternalHeaderFileInfoSource *ES) {
+    ExternalSource = ES;
+  }
+  
   /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
   /// return null on failure.  isAngled indicates whether the file reference is
   /// a <> reference.  If successful, this returns 'UsedDir', the

Modified: cfe/trunk/include/clang/Serialization/ASTBitCodes.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTBitCodes.h?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTBitCodes.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTBitCodes.h Thu Feb 10 11:09:37 2011
@@ -350,7 +350,10 @@
       DIAG_PRAGMA_MAPPINGS = 38,
 
       /// \brief Record code for special CUDA declarations.
-      CUDA_SPECIAL_DECL_REFS = 39
+      CUDA_SPECIAL_DECL_REFS = 39,
+      
+      /// \brief Record code for header search information.
+      HEADER_SEARCH_TABLE = 40
     };
 
     /// \brief Record types used within a source manager block.

Modified: cfe/trunk/include/clang/Serialization/ASTReader.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTReader.h?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTReader.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTReader.h Thu Feb 10 11:09:37 2011
@@ -20,6 +20,7 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/TemplateBase.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
+#include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/PreprocessingRecord.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/IdentifierTable.h"
@@ -165,10 +166,12 @@
 class ASTReader
   : public ExternalPreprocessorSource,
     public ExternalPreprocessingRecordSource,
+    public ExternalHeaderFileInfoSource,
     public ExternalSemaSource,
     public IdentifierInfoLookup,
     public ExternalIdentifierLookup,
-    public ExternalSLocEntrySource {
+    public ExternalSLocEntrySource 
+{
 public:
   enum ASTReadResult { Success, Failure, IgnorePCH };
   /// \brief Types of AST files.
@@ -261,7 +264,7 @@
     /// stored.
     const uint32_t *IdentifierOffsets;
 
-    /// \brief Actual data for the on-disk hash table.
+    /// \brief Actual data for the on-disk hash table of identifiers.
     ///
     /// This pointer points into a memory buffer, where the on-disk hash
     /// table for identifiers actually lives.
@@ -296,6 +299,22 @@
     /// record in the AST file.
     const uint32_t *MacroDefinitionOffsets;
 
+    // === Header search information ===
+    
+    /// \brief The number of local HeaderFileInfo structures.
+    unsigned LocalNumHeaderFileInfos;
+    
+    /// \brief Actual data for the on-disk hash table of header file 
+    /// information.
+    ///
+    /// This pointer points into a memory buffer, where the on-disk hash
+    /// table for header file information actually lives.
+    const char *HeaderFileInfoTableData;
+
+    /// \brief The on-disk hash table that contains information about each of
+    /// the header files.
+    void *HeaderFileInfoTable;
+    
     // === Selectors ===
 
     /// \brief The number of selectors new to this file.
@@ -881,6 +900,9 @@
   /// \brief Read the preprocessed entity at the given offset.
   virtual PreprocessedEntity *ReadPreprocessedEntity(uint64_t Offset);
 
+  /// \brief Read the header file information for the given file entry.
+  virtual HeaderFileInfo GetHeaderFileInfo(const FileEntry *FE);
+
   void ReadPragmaDiagnosticMappings(Diagnostic &Diag);
 
   /// \brief Returns the number of source locations found in the chain.

Modified: cfe/trunk/include/clang/Serialization/ASTWriter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTWriter.h?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTWriter.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTWriter.h Thu Feb 10 11:09:37 2011
@@ -42,6 +42,7 @@
 class NestedNameSpecifier;
 class CXXBaseSpecifier;
 class CXXCtorInitializer;
+class HeaderSearch;
 class LabelStmt;
 class MacroDefinition;
 class MemorizeStatCalls;
@@ -312,6 +313,7 @@
                                const Preprocessor &PP,
                                const char* isysroot);
   void WritePreprocessor(const Preprocessor &PP);
+  void WriteHeaderSearch(HeaderSearch &HS, const char* isysroot);
   void WritePreprocessorDetail(PreprocessingRecord &PPRec);
   void WritePragmaDiagnosticMappings(const Diagnostic &Diag);
   void WriteType(QualType T);

Modified: cfe/trunk/lib/Basic/FileManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/FileManager.cpp?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/FileManager.cpp (original)
+++ cfe/trunk/lib/Basic/FileManager.cpp Thu Feb 10 11:09:37 2011
@@ -493,6 +493,25 @@
                                   StatCache.get());
 }
 
+void FileManager::GetUniqueIDMapping(
+                   llvm::SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
+  UIDToFiles.clear();
+  UIDToFiles.resize(NextFileUID);
+  
+  // Map file entries
+  for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
+         FE = FileEntries.begin(), FEEnd = FileEntries.end();
+       FE != FEEnd; ++FE)
+    if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE)
+      UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
+  
+  // Map virtual file entries
+  for (llvm::SmallVector<FileEntry*, 4>::const_iterator 
+         VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end();
+       VFE != VFEEnd; ++VFE)
+    if (*VFE && *VFE != NON_EXISTENT_FILE)
+      UIDToFiles[(*VFE)->getUID()] = *VFE;
+}
 
 
 void FileManager::PrintStats() const {

Modified: cfe/trunk/lib/Lex/HeaderSearch.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/HeaderSearch.cpp?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/HeaderSearch.cpp (original)
+++ cfe/trunk/lib/Lex/HeaderSearch.cpp Thu Feb 10 11:09:37 2011
@@ -33,12 +33,15 @@
   return ControllingMacro;
 }
 
+ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() {}
+
 HeaderSearch::HeaderSearch(FileManager &FM)
     : FileMgr(FM), FrameworkMap(64) {
   SystemDirIdx = 0;
   NoCurDirSearch = false;
 
   ExternalLookup = 0;
+  ExternalSource = 0;
   NumIncluded = 0;
   NumMultiIncludeFileOptzn = 0;
   NumFrameworkLookups = NumSubFrameworkLookups = 0;
@@ -387,12 +390,19 @@
 HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
   if (FE->getUID() >= FileInfo.size())
     FileInfo.resize(FE->getUID()+1);
-  return FileInfo[FE->getUID()];
+  
+  HeaderFileInfo &HFI = FileInfo[FE->getUID()];
+  if (ExternalSource && !HFI.Resolved) {
+    HFI = ExternalSource->GetHeaderFileInfo(FE);
+    HFI.Resolved = true;
+  }
+  return HFI;
 }
 
 void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) {
   if (UID >= FileInfo.size())
     FileInfo.resize(UID+1);
+  HFI.Resolved = true;
   FileInfo[UID] = HFI;
 }
 

Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Thu Feb 10 11:09:37 2011
@@ -40,12 +40,14 @@
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <iterator>
 #include <cstdio>
 #include <sys/stat.h>
+
 using namespace clang;
 using namespace clang::serialization;
 
@@ -1244,7 +1246,7 @@
       return Failure;
     }
 
-    if (Record.size() < 10) {
+    if (Record.size() < 6) {
       Error("source location entry is incorrect");
       return Failure;
     }
@@ -1269,15 +1271,7 @@
     if (Record[3])
       const_cast<SrcMgr::FileInfo&>(SourceMgr.getSLocEntry(FID).getFile())
         .setHasLineDirectives();
-
-    // Reconstruct header-search information for this file.
-    HeaderFileInfo HFI;
-    HFI.isImport = Record[6];
-    HFI.DirInfo = Record[7];
-    HFI.NumIncludes = Record[8];
-    HFI.ControllingMacroID = Record[9];
-    if (Listener)
-      Listener->ReadHeaderFileInfo(HFI, File->getUID());
+    
     break;
   }
 
@@ -1572,6 +1566,99 @@
   return 0;
 }
 
+namespace {
+  /// \brief Trait class used to search the on-disk hash table containing all of
+  /// the header search information.
+  ///
+  /// The on-disk hash table contains a mapping from each header path to 
+  /// information about that header (how many times it has been included, its
+  /// controlling macro, etc.). Note that we actually hash based on the 
+  /// filename, and support "deep" comparisons of file names based on current
+  /// inode numbers, so that the search can cope with non-normalized path names
+  /// and symlinks.
+  class HeaderFileInfoTrait {
+    const char *SearchPath;
+    struct stat SearchPathStatBuf;
+    llvm::Optional<int> SearchPathStatResult;
+    
+    int StatSimpleCache(const char *Path, struct stat *StatBuf) {
+      if (Path == SearchPath) {
+        if (!SearchPathStatResult)
+          SearchPathStatResult = stat(Path, &SearchPathStatBuf);
+        
+        *StatBuf = SearchPathStatBuf;
+        return *SearchPathStatResult;
+      }
+      
+      return stat(Path, StatBuf);
+    }
+    
+  public:
+    typedef const char *external_key_type;
+    typedef const char *internal_key_type;
+    
+    typedef HeaderFileInfo data_type;
+    
+    HeaderFileInfoTrait(const char *SearchPath = 0) : SearchPath(SearchPath) { }
+    
+    static unsigned ComputeHash(const char *path) {
+      return llvm::HashString(llvm::sys::path::filename(path));
+    }
+    
+    static internal_key_type GetInternalKey(const char *path) { return path; }
+    
+    bool EqualKey(internal_key_type a, internal_key_type b) {
+      if (strcmp(a, b) == 0)
+        return true;
+      
+      if (llvm::sys::path::filename(a) != llvm::sys::path::filename(b))
+        return false;
+      
+      // The file names match, but the path names don't. stat() the files to
+      // see if they are the same.      
+      struct stat StatBufA, StatBufB;
+      if (StatSimpleCache(a, &StatBufA) || StatSimpleCache(b, &StatBufB))
+        return false;
+      
+      return StatBufA.st_ino == StatBufB.st_ino;
+    }
+    
+    static std::pair<unsigned, unsigned>
+    ReadKeyDataLength(const unsigned char*& d) {
+      unsigned KeyLen = (unsigned) clang::io::ReadUnalignedLE16(d);
+      unsigned DataLen = (unsigned) *d++;
+      return std::make_pair(KeyLen + 1, DataLen);
+    }
+    
+    static internal_key_type ReadKey(const unsigned char *d, unsigned) {
+      return (const char *)d;
+    }
+    
+    static data_type ReadData(const internal_key_type, const unsigned char *d,
+                              unsigned DataLen) {
+      const unsigned char *End = d + DataLen;
+      using namespace clang::io;
+      HeaderFileInfo HFI;
+      unsigned Flags = *d++;
+      HFI.isImport = (Flags >> 3) & 0x01;
+      HFI.DirInfo = (Flags >> 1) & 0x03;
+      HFI.Resolved = Flags & 0x01;
+      HFI.NumIncludes = ReadUnalignedLE16(d);
+      HFI.ControllingMacroID = ReadUnalignedLE32(d);
+      assert(End == d && "Wrong data length in HeaderFileInfo deserialization");
+      (void)End;
+      
+      // This HeaderFileInfo was externally loaded.
+      HFI.External = true;
+      return HFI;
+    }
+  };
+}
+
+/// \brief The on-disk hash table used for the global method pool.
+typedef OnDiskChainedHashTable<HeaderFileInfoTrait>
+  HeaderFileInfoLookupTable;
+
 void ASTReader::SetIdentifierIsMacro(IdentifierInfo *II, PerFileData &F,
                                      uint64_t Offset) {
   // Note that this identifier has a macro definition.
@@ -2152,11 +2239,24 @@
         PragmaDiagMappings.insert(PragmaDiagMappings.end(),
                                 Record.begin(), Record.end());
       break;
-
+        
     case CUDA_SPECIAL_DECL_REFS:
       // Later tables overwrite earlier ones.
       CUDASpecialDeclRefs.swap(Record);
       break;
+
+    case HEADER_SEARCH_TABLE:
+      F.HeaderFileInfoTableData = BlobStart;
+      F.LocalNumHeaderFileInfos = Record[1];
+      if (Record[0]) {
+        F.HeaderFileInfoTable
+          = HeaderFileInfoLookupTable::Create(
+                   (const unsigned char *)F.HeaderFileInfoTableData + Record[0],
+                   (const unsigned char *)F.HeaderFileInfoTableData);
+        if (PP)
+          PP->getHeaderSearchInfo().SetExternalSource(this);
+      }
+      break;
     }
     First = false;
   }
@@ -2400,7 +2500,8 @@
   PP->getIdentifierTable().setExternalIdentifierLookup(this);
   PP->getHeaderSearchInfo().SetExternalLookup(this);
   PP->setExternalSource(this);
-
+  PP->getHeaderSearchInfo().SetExternalSource(this);
+  
   // If we have an update block for the TU waiting, we have to add it before
   // deserializing the decl.
   DeclContextOffsetsMap::iterator DCU = DeclContextOffsets.find(0);
@@ -2709,6 +2810,31 @@
   return LoadPreprocessedEntity(*F);
 }
 
+HeaderFileInfo ASTReader::GetHeaderFileInfo(const FileEntry *FE) {
+  HeaderFileInfoTrait Trait(FE->getName());
+  for (unsigned I = 0, N = Chain.size(); I != N; ++I) {
+    PerFileData &F = *Chain[I];
+    HeaderFileInfoLookupTable *Table
+      = static_cast<HeaderFileInfoLookupTable *>(F.HeaderFileInfoTable);
+    if (!Table)
+      continue;
+    
+    // Look in the on-disk hash table for an entry for this file name.
+    HeaderFileInfoLookupTable::iterator Pos = Table->find(FE->getName(), 
+                                                          &Trait);
+    if (Pos == Table->end())
+      continue;
+
+    HeaderFileInfo HFI = *Pos;
+    if (Listener)
+      Listener->ReadHeaderFileInfo(HFI, FE->getUID());
+
+    return HFI;
+  }
+  
+  return HeaderFileInfo();
+}
+
 void ASTReader::ReadPragmaDiagnosticMappings(Diagnostic &Diag) {
   unsigned Idx = 0;
   while (Idx < PragmaDiagMappings.size()) {
@@ -4765,7 +4891,10 @@
   : Type(Ty), SizeInBits(0), LocalNumSLocEntries(0), SLocOffsets(0), LocalSLocSize(0),
     LocalNumIdentifiers(0), IdentifierOffsets(0), IdentifierTableData(0),
     IdentifierLookupTable(0), LocalNumMacroDefinitions(0),
-    MacroDefinitionOffsets(0), LocalNumSelectors(0), SelectorOffsets(0),
+    MacroDefinitionOffsets(0), 
+    LocalNumHeaderFileInfos(0), HeaderFileInfoTableData(0),
+    HeaderFileInfoTable(0),
+    LocalNumSelectors(0), SelectorOffsets(0),
     SelectorLookupTableData(0), SelectorLookupTable(0), LocalNumDecls(0),
     DeclOffsets(0), LocalNumCXXBaseSpecifiers(0), CXXBaseSpecifiersOffsets(0),
     LocalNumTypes(0), TypeOffsets(0), StatCache(0),
@@ -4774,6 +4903,7 @@
 
 ASTReader::PerFileData::~PerFileData() {
   delete static_cast<ASTIdentifierLookupTable *>(IdentifierLookupTable);
+  delete static_cast<HeaderFileInfoLookupTable *>(HeaderFileInfoTable);
   delete static_cast<ASTSelectorLookupTable *>(SelectorLookupTable);
 }
 

Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=125286&r1=125285&r2=125286&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriter.cpp Thu Feb 10 11:09:37 2011
@@ -45,6 +45,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include <cstdio>
+#include <string.h>
 using namespace clang;
 using namespace clang::serialization;
 
@@ -739,7 +740,8 @@
   RECORD(DECL_UPDATES);
   RECORD(CXX_BASE_SPECIFIER_OFFSETS);
   RECORD(DIAG_PRAGMA_MAPPINGS);
-         
+  RECORD(HEADER_SEARCH_TABLE);
+  
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
   RECORD(SM_SLOC_FILE_ENTRY);
@@ -1146,11 +1148,6 @@
   // FileEntry fields.
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
-  // HeaderFileInfo fields.
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isImport
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // DirInfo
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumIncludes
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // ControllingMacro
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
   return Stream.EmitAbbrev(Abbrev);
 }
@@ -1193,6 +1190,135 @@
   return Stream.EmitAbbrev(Abbrev);
 }
 
+namespace {
+  // Trait used for the on-disk hash table of header search information.
+  class HeaderFileInfoTrait {
+    ASTWriter &Writer;
+    HeaderSearch &HS;
+    
+  public:
+    HeaderFileInfoTrait(ASTWriter &Writer, HeaderSearch &HS) 
+      : Writer(Writer), HS(HS) { }
+    
+    typedef const char *key_type;
+    typedef key_type key_type_ref;
+    
+    typedef HeaderFileInfo data_type;
+    typedef const data_type &data_type_ref;
+    
+    static unsigned ComputeHash(const char *path) {
+      // The hash is based only on the filename portion of the key, so that the
+      // reader can match based on filenames when symlinking or excess path
+      // elements ("foo/../", "../") change the form of the name. However,
+      // complete path is still the key.
+      return llvm::HashString(llvm::sys::path::filename(path));
+    }
+    
+    std::pair<unsigned,unsigned>
+    EmitKeyDataLength(llvm::raw_ostream& Out, const char *path,
+                      data_type_ref Data) {
+      unsigned StrLen = strlen(path);
+      clang::io::Emit16(Out, StrLen);
+      unsigned DataLen = 1 + 2 + 4;
+      clang::io::Emit8(Out, DataLen);
+      return std::make_pair(StrLen + 1, DataLen);
+    }
+    
+    void EmitKey(llvm::raw_ostream& Out, const char *path, unsigned KeyLen) {
+      Out.write(path, KeyLen);
+    }
+    
+    void EmitData(llvm::raw_ostream &Out, key_type_ref,
+                  data_type_ref Data, unsigned DataLen) {
+      using namespace clang::io;
+      uint64_t Start = Out.tell(); (void)Start;
+      
+      unsigned char Flags = (Data.isImport << 3)
+                          | (Data.DirInfo << 1)
+                          | Data.Resolved;
+      Emit8(Out, (uint8_t)Flags);
+      Emit16(Out, (uint16_t) Data.NumIncludes);
+      
+      if (!Data.ControllingMacro)
+        Emit32(Out, (uint32_t)Data.ControllingMacroID);
+      else
+        Emit32(Out, (uint32_t)Writer.getIdentifierRef(Data.ControllingMacro));
+      assert(Out.tell() - Start == DataLen && "Wrong data length");
+    }
+  };
+} // end anonymous namespace
+
+/// \brief Write the header search block for the list of files that 
+///
+/// \param HS The header search structure to save.
+///
+/// \param Chain Whether we're creating a chained AST file.
+void ASTWriter::WriteHeaderSearch(HeaderSearch &HS, const char* isysroot) {
+  llvm::SmallVector<const FileEntry *, 16> FilesByUID;
+  HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
+  
+  if (FilesByUID.size() > HS.header_file_size())
+    FilesByUID.resize(HS.header_file_size());
+  
+  HeaderFileInfoTrait GeneratorTrait(*this, HS);
+  OnDiskChainedHashTableGenerator<HeaderFileInfoTrait> Generator;  
+  llvm::SmallVector<const char *, 4> SavedStrings;
+  unsigned NumHeaderSearchEntries = 0;
+  for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
+    const FileEntry *File = FilesByUID[UID];
+    if (!File)
+      continue;
+
+    const HeaderFileInfo &HFI = HS.header_file_begin()[UID];
+    if (HFI.External && Chain)
+      continue;
+
+    // Turn the file name into an absolute path, if it isn't already.
+    const char *Filename = File->getName();
+    Filename = adjustFilenameForRelocatablePCH(Filename, isysroot);
+      
+    // If we performed any translation on the file name at all, we need to
+    // save this string, since the generator will refer to it later.
+    if (Filename != File->getName()) {
+      Filename = strdup(Filename);
+      SavedStrings.push_back(Filename);
+    }
+    
+    Generator.insert(Filename, HFI, GeneratorTrait);
+    ++NumHeaderSearchEntries;
+  }
+  
+  // Create the on-disk hash table in a buffer.
+  llvm::SmallString<4096> TableData;
+  uint32_t BucketOffset;
+  {
+    llvm::raw_svector_ostream Out(TableData);
+    // Make sure that no bucket is at offset 0
+    clang::io::Emit32(Out, 0);
+    BucketOffset = Generator.Emit(Out, GeneratorTrait);
+  }
+
+  // Create a blob abbreviation
+  using namespace llvm;
+  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+  unsigned TableAbbrev = Stream.EmitAbbrev(Abbrev);
+  
+  // Write the stat cache
+  RecordData Record;
+  Record.push_back(HEADER_SEARCH_TABLE);
+  Record.push_back(BucketOffset);
+  Record.push_back(NumHeaderSearchEntries);
+  Stream.EmitRecordWithBlob(TableAbbrev, Record, TableData.str());
+  
+  // Free all of the strings we had to duplicate.
+  for (unsigned I = 0, N = SavedStrings.size(); I != N; ++I)
+    free((void*)SavedStrings[I]);
+}
+
 /// \brief Writes the block containing the serialized form of the
 /// source manager.
 ///
@@ -1294,16 +1420,6 @@
         Record.push_back(Content->Entry->getSize());
         Record.push_back(Content->Entry->getModificationTime());
 
-        // Emit header-search information associated with this file.
-        HeaderFileInfo HFI;
-        HeaderSearch &HS = PP.getHeaderSearchInfo();
-        if (Content->Entry->getUID() < HS.header_file_size())
-          HFI = HS.header_file_begin()[Content->Entry->getUID()];
-        Record.push_back(HFI.isImport);
-        Record.push_back(HFI.DirInfo);
-        Record.push_back(HFI.NumIncludes);
-        AddIdentifierRef(HFI.ControllingMacro, Record);
-
         // Turn the file name into an absolute path, if it isn't already.
         const char *Filename = Content->Entry->getName();
         llvm::SmallString<128> FilePath(Filename);
@@ -1312,11 +1428,6 @@
 
         Filename = adjustFilenameForRelocatablePCH(Filename, isysroot);
         Stream.EmitRecordWithBlob(SLocFileAbbrv, Record, Filename);
-
-        // FIXME: For now, preload all file source locations, so that
-        // we get the appropriate File entries in the reader. This is
-        // a temporary measure.
-        PreloadSLocs.push_back(BaseSLocID + SLocEntryOffsets.size());
       } else {
         // The source location entry is a buffer. The blob associated
         // with this entry contains the contents of the buffer.
@@ -2596,6 +2707,7 @@
   Stream.ExitBlock();
 
   WritePreprocessor(PP);
+  WriteHeaderSearch(PP.getHeaderSearchInfo(), isysroot);
   WriteSelectors(SemaRef);
   WriteReferencedSelectorsPool(SemaRef);
   WriteIdentifierTable(PP);





More information about the cfe-commits mailing list