[cfe-commits] r69737 - in /cfe/trunk: include/clang/AST/ExternalASTSource.h include/clang/Basic/IdentifierTable.h include/clang/Basic/OnDiskHashTable.h include/clang/Frontend/PCHReader.h include/clang/Sema/ExternalSemaSource.h include/clang/Sema/SemaConsumer.h lib/Frontend/PCHReader.cpp lib/Frontend/PCHWriter.cpp lib/Sema/IdentifierResolver.cpp lib/Sema/IdentifierResolver.h lib/Sema/ParseAST.cpp lib/Sema/SemaLookup.cpp test/PCH/builtins.c test/PCH/builtins.h

Douglas Gregor dgregor at apple.com
Tue Apr 21 15:25:48 PDT 2009


Author: dgregor
Date: Tue Apr 21 17:25:48 2009
New Revision: 69737

URL: http://llvm.org/viewvc/llvm-project?rev=69737&view=rev
Log:
Lazy deserialization of the declaration chains associated with
identifiers from a precompiled header.

This patch changes the primary name lookup method for entities within
a precompiled header. Previously, we would load all of the names of
declarations at translation unit scope into a large DenseMap (inside
the TranslationUnitDecl's DeclContext), and then perform a special
"last resort" lookup into this DeclContext when we knew there was a
PCH file (see Sema::LookupName). Now, when we see an identifier named
for the first time, we load all of the declarations with that name
that are visible from the translation unit into the IdentifierInfo's
chain of declarations. Thus, the explicit "look into the translation
unit's DeclContext" code is gone, and Sema effectively uses the same
IdentifierInfo-based name lookup mechanism whether we are using a PCH
file or not. 

This approach should help PCH scale with the size of the input program
rather than the size of the PCH file. The "Hello, World!" application
with Carbon.h as a PCH file now loads 20% of the identifiers in the
PCH file rather than 85% of the identifiers. 

90% of the 20% of identifiers loaded are actually loaded when we
deserialize the preprocessor state. The next step is to make the
preprocessor load macros lazily, which should drastically reduce the
number of types, declarations, and identifiers loaded for "Hello,
World".


Added:
    cfe/trunk/include/clang/Sema/ExternalSemaSource.h
    cfe/trunk/test/PCH/builtins.c
    cfe/trunk/test/PCH/builtins.h
Modified:
    cfe/trunk/include/clang/AST/ExternalASTSource.h
    cfe/trunk/include/clang/Basic/IdentifierTable.h
    cfe/trunk/include/clang/Basic/OnDiskHashTable.h
    cfe/trunk/include/clang/Frontend/PCHReader.h
    cfe/trunk/include/clang/Sema/SemaConsumer.h
    cfe/trunk/lib/Frontend/PCHReader.cpp
    cfe/trunk/lib/Frontend/PCHWriter.cpp
    cfe/trunk/lib/Sema/IdentifierResolver.cpp
    cfe/trunk/lib/Sema/IdentifierResolver.h
    cfe/trunk/lib/Sema/ParseAST.cpp
    cfe/trunk/lib/Sema/SemaLookup.cpp

Modified: cfe/trunk/include/clang/AST/ExternalASTSource.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/ExternalASTSource.h?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/include/clang/AST/ExternalASTSource.h (original)
+++ cfe/trunk/include/clang/AST/ExternalASTSource.h Tue Apr 21 17:25:48 2009
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This file defines the ExternalASTSource interface, 
+//  This file defines the ExternalASTSource interface, which enables
+//  construction of AST nodes from some external source.x
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_AST_EXTERNAL_AST_SOURCE_H
@@ -22,6 +23,7 @@
 class ASTConsumer;
 class Decl;
 class DeclContext;
+class ExternalSemaSource; // layering violation required for downcasting
 class Stmt;
 
 /// \brief The deserialized representation of a set of declarations
@@ -44,7 +46,15 @@
 /// actual type and declaration nodes, and read parts of declaration
 /// contexts.
 class ExternalASTSource {
+  /// \brief Whether this AST source also provides information for
+  /// semantic analysis.
+  bool SemaSource;
+
+  friend class ExternalSemaSource;
+
 public:
+  ExternalASTSource() : SemaSource(false) { }
+
   virtual ~ExternalASTSource();
 
   /// \brief Resolve a type ID into a type, potentially building a new

Modified: cfe/trunk/include/clang/Basic/IdentifierTable.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/IdentifierTable.h?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/include/clang/Basic/IdentifierTable.h (original)
+++ cfe/trunk/include/clang/Basic/IdentifierTable.h Tue Apr 21 17:25:48 2009
@@ -229,7 +229,7 @@
 };
 
 /// IdentifierInfoLookup - An abstract class used by IdentifierTable that
-///  provides an interface for for performing lookups from strings
+///  provides an interface for performing lookups from strings
 /// (const char *) to IdentiferInfo objects.
 class IdentifierInfoLookup {
 public:
@@ -260,6 +260,11 @@
   IdentifierTable(const LangOptions &LangOpts,
                   IdentifierInfoLookup* externalLookup = 0);
   
+  /// \brief Set the external identifier lookup mechanism.
+  void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
+    ExternalLookup = IILookup;
+  }
+
   llvm::BumpPtrAllocator& getAllocator() {
     return HashTable.getAllocator();
   }
@@ -295,6 +300,34 @@
     return *II;
   }
   
+  /// \brief Creates a new IdentifierInfo from the given string.
+  ///
+  /// This is a lower-level version of get() that requires that this
+  /// identifier not be known previously and that does not consult an
+  /// external source for identifiers. In particular, external
+  /// identifier sources can use this routine to build IdentifierInfo
+  /// nodes and then introduce additional information about those
+  /// identifiers.
+  IdentifierInfo &CreateIdentifierInfo(const char *NameStart, 
+                                       const char *NameEnd) {
+    llvm::StringMapEntry<IdentifierInfo*> &Entry =
+      HashTable.GetOrCreateValue(NameStart, NameEnd);
+    
+    IdentifierInfo *II = Entry.getValue();
+    assert(!II && "IdentifierInfo already exists");
+    
+    // Lookups failed, make a new IdentifierInfo.
+    void *Mem = getAllocator().Allocate<IdentifierInfo>();
+    II = new (Mem) IdentifierInfo();
+    Entry.setValue(II);
+
+    // Make sure getName() knows how to find the IdentifierInfo
+    // contents.
+    II->Entry = &Entry;
+
+    return *II;
+  }
+
   IdentifierInfo &get(const char *Name) {
     return get(Name, Name+strlen(Name));
   }
@@ -304,14 +337,11 @@
     return get(NameBytes, NameBytes+Name.size());
   }
 
-private:
   typedef HashTableTy::const_iterator iterator;
   typedef HashTableTy::const_iterator const_iterator;
   
   iterator begin() const { return HashTable.begin(); }
   iterator end() const   { return HashTable.end(); }
-public:
-  
   unsigned size() const { return HashTable.size(); }
   
   /// PrintStats - Print some statistics to stderr that indicate how well the

Modified: cfe/trunk/include/clang/Basic/OnDiskHashTable.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/OnDiskHashTable.h?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/include/clang/Basic/OnDiskHashTable.h (original)
+++ cfe/trunk/include/clang/Basic/OnDiskHashTable.h Tue Apr 21 17:25:48 2009
@@ -242,6 +242,8 @@
   const unsigned NumEntries;
   const unsigned char* const Buckets;
   const unsigned char* const Base;
+  Info InfoObj;
+
 public:
   typedef typename Info::internal_key_type internal_key_type;
   typedef typename Info::external_key_type external_key_type;
@@ -249,9 +251,10 @@
   
   OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
                          const unsigned char* buckets,
-                         const unsigned char* base)
+                         const unsigned char* base,
+                         const Info &InfoObj = Info())
     : NumBuckets(numBuckets), NumEntries(numEntries),
-      Buckets(buckets), Base(base) {        
+      Buckets(buckets), Base(base), InfoObj(InfoObj) {
         assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
                "'buckets' must have a 4-byte alignment");
       }
@@ -267,22 +270,27 @@
     internal_key_type key;
     const unsigned char* const data;
     const unsigned len;
+    Info *InfoObj;
   public:
     iterator() : data(0), len(0) {}
-    iterator(const internal_key_type k, const unsigned char* d, unsigned l)
-      : key(k), data(d), len(l) {}
+    iterator(const internal_key_type k, const unsigned char* d, unsigned l,
+             Info *InfoObj)
+      : key(k), data(d), len(l), InfoObj(InfoObj) {}
     
-    data_type operator*() const { return Info::ReadData(key, data, len); }    
+    data_type operator*() const { return InfoObj->ReadData(key, data, len); }    
     bool operator==(const iterator& X) const { return X.data == data; }    
     bool operator!=(const iterator& X) const { return X.data != data; }
   };    
   
-  iterator find(const external_key_type& eKey) {
+  iterator find(const external_key_type& eKey, Info *InfoPtr = 0) {
+    if (!InfoPtr)
+      InfoPtr = &InfoObj;
+
     using namespace io;
     const internal_key_type& iKey = Info::GetInternalKey(eKey);
     unsigned key_hash = Info::ComputeHash(iKey);
     
-    // Each bucket is just a 32-bit offset into the PTH file.
+    // Each bucket is just a 32-bit offset into the hash table file.
     unsigned idx = key_hash & (NumBuckets - 1);
     const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
     
@@ -319,7 +327,7 @@
       }
       
       // The key matches!
-      return iterator(X, Items + L.first, L.second);
+      return iterator(X, Items + L.first, L.second, InfoPtr);
     }
     
     return iterator();
@@ -329,7 +337,8 @@
   
   
   static OnDiskChainedHashTable* Create(const unsigned char* buckets,
-                                        const unsigned char* const base) {
+                                        const unsigned char* const base,
+                                        const Info &InfoObj = Info()) {
     using namespace io;
     assert(buckets > base);
     assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
@@ -338,7 +347,7 @@
     unsigned numBuckets = ReadLE32(buckets);
     unsigned numEntries = ReadLE32(buckets);
     return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
-                                            base);
+                                            base, InfoObj);
   }  
 };
 

Modified: cfe/trunk/include/clang/Frontend/PCHReader.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHReader.h?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHReader.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHReader.h Tue Apr 21 17:25:48 2009
@@ -15,9 +15,10 @@
 
 #include "clang/Frontend/PCHBitCodes.h"
 #include "clang/AST/DeclarationName.h"
-#include "clang/AST/ExternalASTSource.h"
+#include "clang/Sema/ExternalSemaSource.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/IdentifierTable.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
@@ -44,7 +45,9 @@
 class DeclContext;
 class GotoStmt;
 class LabelStmt;
+class NamedDecl;
 class Preprocessor;
+class Sema;
 class SwitchCase;
 
 /// \brief Reads a precompiled head containing the contents of a
@@ -59,11 +62,15 @@
 /// The PCH reader provides lazy de-serialization of declarations, as
 /// required when traversing the AST. Only those AST nodes that are
 /// actually required will be de-serialized.
-class PCHReader : public ExternalASTSource {
+class PCHReader : public ExternalSemaSource, public IdentifierInfoLookup {
 public:
   enum PCHReadResult { Success, Failure, IgnorePCH };
 
 private:
+  /// \brief The semantic analysis object that will be processing the
+  /// PCH file and the translation unit that uses it.
+  Sema *SemaObj;
+
   /// \brief The preprocessor that will be loading the source file.
   Preprocessor &PP;
 
@@ -116,8 +123,14 @@
   /// DeclContext.
   DeclContextOffsetsMap DeclContextOffsets;
 
-  /// \brief String data for the identifiers in the PCH file.
-  const char *IdentifierTable;
+  /// \brief Actual data for the on-disk hash table.
+  ///
+  /// FIXME: This will eventually go away.
+  const char *IdentifierTableData;
+
+  /// \brief A pointer to an on-disk hash table of opaque type
+  /// IdentifierHashTable.
+  void *IdentifierLookupTable;
 
   /// \brief String data for identifiers, indexed by the identifier ID
   /// minus one.
@@ -158,10 +171,10 @@
   /// in the PCH file.
   unsigned TotalNumStatements;
 
-  /// \brief 
+  /// \brief FIXME: document!
   llvm::SmallVector<uint64_t, 4> SpecialTypes;
 
-  PCHReadResult ReadPCHBlock();
+  PCHReadResult ReadPCHBlock(uint64_t &PreprocessorBlockOffset);
   bool CheckPredefinesBuffer(const char *PCHPredef, 
                              unsigned PCHPredefLen,
                              FileID PCHBufferID);
@@ -179,8 +192,9 @@
 public:
   typedef llvm::SmallVector<uint64_t, 64> RecordData;
 
-  PCHReader(Preprocessor &PP, ASTContext &Context) 
-    : PP(PP), Context(Context), IdentifierTable(0), NumStatementsRead(0) { }
+  explicit PCHReader(Preprocessor &PP, ASTContext &Context) 
+    : SemaObj(0), PP(PP), Context(Context), 
+      IdentifierTableData(0), NumStatementsRead(0) { }
 
   ~PCHReader() {}
 
@@ -246,6 +260,23 @@
   /// \brief Print some statistics about PCH usage.
   virtual void PrintStats();
 
+  /// \brief Initialize the semantic source with the Sema instance
+  /// being used to perform semantic analysis on the abstract syntax
+  /// tree.
+  virtual void InitializeSema(Sema &S);
+
+  /// \brief Retrieve the IdentifierInfo for the named identifier.
+  ///
+  /// This routine builds a new IdentifierInfo for the given
+  /// identifier. If any declarations with this name are visible from
+  /// translation unit scope, their declarations will be deserialized
+  /// and introduced into the declaration chain of the
+  /// identifier. FIXME: if this identifier names a macro, deserialize
+  /// the macro.
+  virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd);
+
+  void SetIdentifierInfo(unsigned ID, const IdentifierInfo *II);
+
   /// \brief Report a diagnostic.
   DiagnosticBuilder Diag(unsigned DiagID);
 
@@ -284,9 +315,22 @@
   /// supplements.
   ASTContext &getContext() { return Context; }
 
+  // FIXME: temporary hack to store declarations that we deserialized
+  // before we had access to the Sema object.
+  llvm::SmallVector<NamedDecl *, 16> TUDecls;
+
+  /// \brief Retrieve the semantic analysis object used to analyze the
+  /// translation unit in which the precompiled header is being
+  /// imported.
+  Sema *getSema() { return SemaObj; }
+
   /// \brief Retrieve the stream that this PCH reader is reading from.
   llvm::BitstreamReader &getStream() { return Stream; }
 
+  /// \brief Retrieve the identifier table associated with the
+  /// preprocessor.
+  IdentifierTable &getIdentifierTable();
+
   /// \brief Record that the given ID maps to the given switch-case
   /// statement.
   void RecordSwitchCaseID(SwitchCase *SC, unsigned ID);

Added: cfe/trunk/include/clang/Sema/ExternalSemaSource.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/ExternalSemaSource.h?rev=69737&view=auto

==============================================================================
--- cfe/trunk/include/clang/Sema/ExternalSemaSource.h (added)
+++ cfe/trunk/include/clang/Sema/ExternalSemaSource.h Tue Apr 21 17:25:48 2009
@@ -0,0 +1,45 @@
+//===--- ExternalSemaSource.h - External Sema Interface ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the ExternalSemaSource interface.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H
+#define LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H
+
+#include "clang/AST/ExternalASTSource.h"
+
+namespace clang {
+
+class Sema;
+
+/// \brief An abstract interface that should be implemented by
+/// external AST sources that also provide information for semantic
+/// analysis.
+class ExternalSemaSource : public ExternalASTSource {
+public:
+  ExternalSemaSource() {
+    ExternalASTSource::SemaSource = true;
+  }
+
+  /// \brief Initialize the semantic source with the Sema instance
+  /// being used to perform semantic analysis on the abstract syntax
+  /// tree.
+  virtual void InitializeSema(Sema &S) {}
+  
+  // isa/cast/dyn_cast support
+  static bool classof(const ExternalASTSource *Source) { 
+    return Source->SemaSource;
+  }
+  static bool classof(const ExternalSemaSource *) { return true; }
+};
+
+} // end namespace clang
+
+#endif

Modified: cfe/trunk/include/clang/Sema/SemaConsumer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/SemaConsumer.h?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/include/clang/Sema/SemaConsumer.h (original)
+++ cfe/trunk/include/clang/Sema/SemaConsumer.h Tue Apr 21 17:25:48 2009
@@ -25,7 +25,7 @@
   /// analysis of the entities in those ASTs.
   class SemaConsumer : public ASTConsumer {
   public:
-    explicit SemaConsumer() {
+    SemaConsumer() {
       ASTConsumer::SemaConsumer = true;
     }
 

Modified: cfe/trunk/lib/Frontend/PCHReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHReader.cpp?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHReader.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHReader.cpp Tue Apr 21 17:25:48 2009
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 #include "clang/Frontend/PCHReader.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "../Sema/Sema.h" // FIXME: move Sema headers elsewhere
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
@@ -22,6 +23,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/OnDiskHashTable.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/SourceManagerInternals.h"
 #include "clang/Basic/FileManager.h"
@@ -994,6 +996,111 @@
   return 0;
 }
 
+//===----------------------------------------------------------------------===//
+// PCH reader implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class VISIBILITY_HIDDEN PCHIdentifierLookupTrait {
+  PCHReader &Reader;
+
+  // If we know the IdentifierInfo in advance, it is here and we will
+  // not build a new one. Used when deserializing information about an
+  // identifier that was constructed before the PCH file was read.
+  IdentifierInfo *KnownII;
+
+public:
+  typedef IdentifierInfo * data_type;
+
+  typedef const std::pair<const char*, unsigned> external_key_type;
+
+  typedef external_key_type internal_key_type;
+
+  explicit PCHIdentifierLookupTrait(PCHReader &Reader, IdentifierInfo *II = 0) 
+    : Reader(Reader), KnownII(II) { }
+  
+  static bool EqualKey(const internal_key_type& a,
+                       const internal_key_type& b) {
+    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
+                                  : false;
+  }
+  
+  static unsigned ComputeHash(const internal_key_type& a) {
+    return BernsteinHash(a.first, a.second);
+  }
+  
+  // This hopefully will just get inlined and removed by the optimizer.
+  static const internal_key_type&
+  GetInternalKey(const external_key_type& x) { return x; }
+  
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char*& d) {
+    using namespace clang::io;
+    unsigned KeyLen = ReadUnalignedLE16(d);
+    unsigned DataLen = ReadUnalignedLE16(d);
+    return std::make_pair(KeyLen, DataLen);
+  }
+    
+  static std::pair<const char*, unsigned>
+  ReadKey(const unsigned char* d, unsigned n) {
+    assert(n >= 2 && d[n-1] == '\0');
+    return std::make_pair((const char*) d, n-1);
+  }
+    
+  IdentifierInfo *ReadData(const internal_key_type& k, 
+                           const unsigned char* d,
+                           unsigned DataLen) {
+    using namespace clang::io;
+    uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these?
+    (void)Bits;
+    pch::IdentID ID = ReadUnalignedLE32(d);
+    DataLen -= 8;
+
+    // Build the IdentifierInfo itself and link the identifier ID with
+    // the new IdentifierInfo.
+    IdentifierInfo *II = KnownII;
+    if (!II)
+      II = &Reader.getIdentifierTable().CreateIdentifierInfo(
+                                                 k.first, k.first + k.second);
+    Reader.SetIdentifierInfo(ID, II);
+
+    // FIXME: If this identifier is a macro, deserialize the macro
+    // definition now.
+
+    // Read all of the declarations visible at global scope with this
+    // name.
+    Sema *SemaObj = Reader.getSema();
+    while (DataLen > 0) {
+      NamedDecl *D = cast<NamedDecl>(Reader.GetDecl(ReadUnalignedLE32(d)));
+
+      if (SemaObj) {
+        // Introduce this declaration into the translation-unit scope
+        // and add it to the declaration chain for this identifier, so
+        // that (unqualified) name lookup will find it.
+        SemaObj->TUScope->AddDecl(Action::DeclPtrTy::make(D));
+        SemaObj->IdResolver.AddDeclToIdentifierChain(II, D);
+      } else {
+        // Queue this declaration so that it will be added to the
+        // translation unit scope and identifier's declaration chain
+        // once a Sema object is known.
+        // FIXME: This is a temporary hack. It will go away once we have
+        // lazy deserialization of macros.
+        Reader.TUDecls.push_back(D);
+      }
+
+      DataLen -= 4;
+    }
+    return II;
+  }
+};
+  
+} // end anonymous namespace  
+
+/// \brief The on-disk hash table used to contain information about
+/// all of the identifiers in the program.
+typedef OnDiskChainedHashTable<PCHIdentifierLookupTrait> 
+  PCHIdentifierLookupTable;
+
 // FIXME: use the diagnostics machinery
 static bool Error(const char *Str) {
   std::fprintf(stderr, "%s\n", Str);
@@ -1314,30 +1421,18 @@
   }
 }
 
-PCHReader::PCHReadResult PCHReader::ReadPCHBlock() {
+PCHReader::PCHReadResult 
+PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) {
   if (Stream.EnterSubBlock(pch::PCH_BLOCK_ID)) {
     Error("Malformed block record");
     return Failure;
   }
 
-  uint64_t PreprocessorBlockBit = 0;
-
   // Read all of the records and blocks for the PCH file.
   RecordData Record;
   while (!Stream.AtEndOfStream()) {
     unsigned Code = Stream.ReadCode();
     if (Code == llvm::bitc::END_BLOCK) {
-      // If we saw the preprocessor block, read it now.
-      if (PreprocessorBlockBit) {
-        uint64_t SavedPos = Stream.GetCurrentBitNo();
-        Stream.JumpToBit(PreprocessorBlockBit);
-        if (ReadPreprocessorBlock()) {
-          Error("Malformed preprocessor block");
-          return Failure;
-        }
-        Stream.JumpToBit(SavedPos);
-      }        
-      
       if (Stream.ReadBlockEnd()) {
         Error("Error at end of module block");
         return Failure;
@@ -1360,11 +1455,11 @@
       case pch::PREPROCESSOR_BLOCK_ID:
         // Skip the preprocessor block for now, but remember where it is.  We
         // want to read it in after the identifier table.
-        if (PreprocessorBlockBit) {
+        if (PreprocessorBlockOffset) {
           Error("Multiple preprocessor blocks found.");
           return Failure;
         }
-        PreprocessorBlockBit = Stream.GetCurrentBitNo();
+        PreprocessorBlockOffset = Stream.GetCurrentBitNo();
         if (Stream.SkipBlock()) {
           Error("Malformed block record");
           return Failure;
@@ -1437,7 +1532,15 @@
     }
 
     case pch::IDENTIFIER_TABLE:
-      IdentifierTable = BlobStart;
+      IdentifierTableData = BlobStart;
+      IdentifierLookupTable 
+        = PCHIdentifierLookupTable::Create(
+                        (const unsigned char *)IdentifierTableData + Record[0],
+                        (const unsigned char *)IdentifierTableData, 
+                        PCHIdentifierLookupTrait(*this));
+      // FIXME: What about any identifiers already placed into the
+      // identifier table? Should we load decls with those names now?
+      PP.getIdentifierTable().setExternalIdentifierLookup(this);
       break;
 
     case pch::IDENTIFIER_OFFSET:
@@ -1479,6 +1582,23 @@
   return Failure;
 }
 
+namespace {
+  /// \brief Helper class that saves the current stream position and
+  /// then restores it when destroyed.
+  struct VISIBILITY_HIDDEN SavedStreamPosition {
+    explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
+      : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
+
+    ~SavedStreamPosition() {
+      Stream.JumpToBit(Offset);
+    }
+
+  private:
+    llvm::BitstreamReader &Stream;
+    uint64_t Offset;
+  };
+}
+
 PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
   // Set the PCH file name.
   this->FileName = FileName;
@@ -1506,6 +1626,7 @@
 
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
+  uint64_t PreprocessorBlockOffset = 0;
   while (!Stream.AtEndOfStream()) {
     unsigned Code = Stream.ReadCode();
     
@@ -1515,7 +1636,7 @@
     }
 
     unsigned BlockID = Stream.ReadSubBlockID();
-    
+
     // We only know the PCH subblock ID.
     switch (BlockID) {
     case llvm::bitc::BLOCKINFO_BLOCK_ID:
@@ -1525,7 +1646,7 @@
       }
       break;
     case pch::PCH_BLOCK_ID:
-      switch (ReadPCHBlock()) {
+      switch (ReadPCHBlock(PreprocessorBlockOffset)) {
       case Success:
         break;
 
@@ -1551,28 +1672,54 @@
   // Load the translation unit declaration
   ReadDeclRecord(DeclOffsets[0], 0);
 
+  // Initialization of builtins and library builtins occurs before the
+  // PCH file is read, so there may be some identifiers that were
+  // loaded into the IdentifierTable before we intercepted the
+  // creation of identifiers. Iterate through the list of known
+  // identifiers and determine whether we have to establish
+  // preprocessor definitions or top-level identifier declaration
+  // chains for those identifiers.
+  //
+  // We copy the IdentifierInfo pointers to a small vector first,
+  // since de-serializing declarations or macro definitions can add
+  // new entries into the identifier table, invalidating the
+  // iterators.
+  llvm::SmallVector<IdentifierInfo *, 128> Identifiers;
+  for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
+                              IdEnd = PP.getIdentifierTable().end();
+       Id != IdEnd; ++Id)
+    Identifiers.push_back(Id->second);
+  PCHIdentifierLookupTable *IdTable 
+    = (PCHIdentifierLookupTable *)IdentifierLookupTable;
+  for (unsigned I = 0, N = Identifiers.size(); I != N; ++I) {
+    IdentifierInfo *II = Identifiers[I];
+    // Look in the on-disk hash table for an entry for
+    PCHIdentifierLookupTrait Info(*this, II);
+    std::pair<const char*, unsigned> Key(II->getName(), II->getLength());
+    PCHIdentifierLookupTable::iterator Pos = IdTable->find(Key, &Info);
+    if (Pos == IdTable->end())
+      continue;
+
+    // Dereferencing the iterator has the effect of populating the
+    // IdentifierInfo node with the various declarations it needs.
+    (void)*Pos;
+  }
+
   // Load the special types.
   Context.setBuiltinVaListType(
     GetType(SpecialTypes[pch::SPECIAL_TYPE_BUILTIN_VA_LIST]));
 
-  return Success;
-}
-
-namespace {
-  /// \brief Helper class that saves the current stream position and
-  /// then restores it when destroyed.
-  struct VISIBILITY_HIDDEN SavedStreamPosition {
-    explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
-      : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
-
-    ~SavedStreamPosition() {
-      Stream.JumpToBit(Offset);
+  // If we saw the preprocessor block, read it now.
+  if (PreprocessorBlockOffset) {
+    SavedStreamPosition SavedPos(Stream);
+    Stream.JumpToBit(PreprocessorBlockOffset);
+    if (ReadPreprocessorBlock()) {
+      Error("Malformed preprocessor block");
+      return Failure;
     }
+  }
 
-  private:
-    llvm::BitstreamReader &Stream;
-    uint64_t Offset;
-  };
+  return Success;
 }
 
 /// \brief Parse the record that corresponds to a LangOptions data
@@ -2021,7 +2168,7 @@
   }
   }
 
-  assert(D && "Unknown declaration creating PCH file");
+  assert(D && "Unknown declaration reading PCH file");
   if (D) {
     LoadedDecl(Index, D);
     Reader.Visit(D);
@@ -2220,11 +2367,44 @@
   std::fprintf(stderr, "\n");
 }
 
+void PCHReader::InitializeSema(Sema &S) {
+  SemaObj = &S;
+ 
+  // FIXME: this makes sure any declarations that were deserialized
+  // "too early" still get added to the identifier's declaration
+  // chains.
+  for (unsigned I = 0, N = TUDecls.size(); I != N; ++I) {
+    SemaObj->TUScope->AddDecl(Action::DeclPtrTy::make(TUDecls[I]));
+    SemaObj->IdResolver.AddDecl(TUDecls[I]);
+  }
+  TUDecls.clear();
+}
+
+IdentifierInfo* PCHReader::get(const char *NameStart, const char *NameEnd) {
+  // Try to find this name within our on-disk hash table
+  PCHIdentifierLookupTable *IdTable 
+    = (PCHIdentifierLookupTable *)IdentifierLookupTable;
+  std::pair<const char*, unsigned> Key(NameStart, NameEnd - NameStart);
+  PCHIdentifierLookupTable::iterator Pos = IdTable->find(Key);
+  if (Pos == IdTable->end())
+    return 0;
+
+  // Dereferencing the iterator has the effect of building the
+  // IdentifierInfo node and populating it with the various
+  // declarations it needs.
+  return *Pos;
+}
+
+void PCHReader::SetIdentifierInfo(unsigned ID, const IdentifierInfo *II) {
+  assert(ID && "Non-zero identifier ID required");
+  IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(II);
+}
+
 IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) {
   if (ID == 0)
     return 0;
   
-  if (!IdentifierTable || IdentifierData.empty()) {
+  if (!IdentifierTableData || IdentifierData.empty()) {
     Error("No identifier table in PCH file");
     return 0;
   }
@@ -2232,8 +2412,7 @@
   if (IdentifierData[ID - 1] & 0x01) {
     uint64_t Offset = IdentifierData[ID - 1] >> 1;
     IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(
-                               &Context.Idents.get(IdentifierTable + Offset));
-    // FIXME: also read the contents of the IdentifierInfo.
+                               &Context.Idents.get(IdentifierTableData + Offset));
   }
   
   return reinterpret_cast<IdentifierInfo *>(IdentifierData[ID - 1]);
@@ -2724,6 +2903,12 @@
                                     DiagID);
 }
 
+/// \brief Retrieve the identifier table associated with the
+/// preprocessor.
+IdentifierTable &PCHReader::getIdentifierTable() {
+  return PP.getIdentifierTable();
+}
+
 /// \brief Record that the given ID maps to the given switch-case
 /// statement.
 void PCHReader::RecordSwitchCaseID(SwitchCase *SC, unsigned ID) {

Modified: cfe/trunk/lib/Frontend/PCHWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHWriter.cpp?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHWriter.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHWriter.cpp Tue Apr 21 17:25:48 2009
@@ -1421,7 +1421,7 @@
 ///
 void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
   // Enter the preprocessor block.
-  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 3);
+  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 2);
   
   // If the PCH file contains __DATE__ or __TIME__ emit a warning about this.
   // FIXME: use diagnostics subsystem for localization etc.
@@ -1732,13 +1732,13 @@
                       pch::IdentID ID) {
     unsigned KeyLen = strlen(II->getName()) + 1;
     clang::io::Emit16(Out, KeyLen);
-    unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags
-                                  // 4 bytes for the persistent ID
-                                  // 2 bytes for the length of the decl chain
+    unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
+                              // 4 bytes for the persistent ID
     for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
                                    DEnd = IdentifierResolver::end();
          D != DEnd; ++D)
       DataLen += sizeof(pch::DeclID);
+    clang::io::Emit16(Out, DataLen);
     return std::make_pair(KeyLen, DataLen);
   }
   
@@ -1762,15 +1762,18 @@
     clang::io::Emit32(Out, Bits);
     clang::io::Emit32(Out, ID);
 
-    llvm::SmallVector<pch::DeclID, 8> Decls;
-    for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
-                                   DEnd = IdentifierResolver::end();
+    // Emit the declaration IDs in reverse order, because the
+    // IdentifierResolver provides the declarations as they would be
+    // visible (e.g., the function "stat" would come before the struct
+    // "stat"), but IdentifierResolver::AddDeclToIdentifierChain()
+    // adds declarations to the end of the list (so we need to see the
+    // struct "status" before the function "status").
+    llvm::SmallVector<Decl *, 16> Decls(IdentifierResolver::begin(II), 
+                                        IdentifierResolver::end());
+    for (llvm::SmallVector<Decl *, 16>::reverse_iterator D = Decls.rbegin(),
+                                                      DEnd = Decls.rend();
          D != DEnd; ++D)
-      Decls.push_back(Writer.getDeclID(*D));
-
-    clang::io::Emit16(Out, Decls.size());
-    for (unsigned I = 0; I < Decls.size(); ++I)
-      clang::io::Emit32(Out, Decls[I]);
+      clang::io::Emit32(Out, Writer.getDeclID(*D));
   }
 };
 } // end anonymous namespace
@@ -1799,21 +1802,24 @@
 
     // Create the on-disk hash table in a buffer.
     llvm::SmallVector<char, 4096> IdentifierTable; 
+    uint32_t BucketOffset;
     {
       PCHIdentifierTableTrait Trait(*this);
       llvm::raw_svector_ostream Out(IdentifierTable);
-      Generator.Emit(Out, Trait);
+      BucketOffset = Generator.Emit(Out, Trait);
     }
 
     // Create a blob abbreviation
     BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the identifier table
     RecordData Record;
     Record.push_back(pch::IDENTIFIER_TABLE);
+    Record.push_back(BucketOffset);
     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, 
                               &IdentifierTable.front(), 
                               IdentifierTable.size());

Modified: cfe/trunk/lib/Sema/IdentifierResolver.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/IdentifierResolver.cpp?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/IdentifierResolver.cpp (original)
+++ cfe/trunk/lib/Sema/IdentifierResolver.cpp Tue Apr 21 17:25:48 2009
@@ -243,6 +243,28 @@
   return end();
 }
 
+void IdentifierResolver::AddDeclToIdentifierChain(IdentifierInfo *II, 
+                                                  NamedDecl *D) {
+  void *Ptr = II->getFETokenInfo<void>();
+
+  if (!Ptr) {
+    II->setFETokenInfo(D);
+    return;
+  }
+
+  IdDeclInfo *IDI;
+
+  if (isDeclPtr(Ptr)) {
+    II->setFETokenInfo(NULL);
+    IDI = &(*IdDeclInfos)[II];
+    NamedDecl *PrevD = static_cast<NamedDecl*>(Ptr);
+    IDI->AddDecl(PrevD);
+  } else
+    IDI = toIdDeclInfo(Ptr);
+
+  IDI->AddDecl(D);
+}
+
 //===----------------------------------------------------------------------===//
 // IdDeclInfoMap Implementation
 //===----------------------------------------------------------------------===//

Modified: cfe/trunk/lib/Sema/IdentifierResolver.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/IdentifierResolver.h?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/IdentifierResolver.h (original)
+++ cfe/trunk/lib/Sema/IdentifierResolver.h Tue Apr 21 17:25:48 2009
@@ -177,6 +177,14 @@
   /// (and, therefore, replaced).
   bool ReplaceDecl(NamedDecl *Old, NamedDecl *New);
 
+  /// \brief Link the declaration into the chain of declarations for
+  /// the given identifier.
+  ///
+  /// This is a lower-level routine used by the PCH reader to link a
+  /// declaration into a specific IdentifierInfo before the
+  /// declaration actually has a name.
+  void AddDeclToIdentifierChain(IdentifierInfo *II, NamedDecl *D);
+
   explicit IdentifierResolver(const LangOptions &LangOpt);
   ~IdentifierResolver();
 

Modified: cfe/trunk/lib/Sema/ParseAST.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/ParseAST.cpp?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/ParseAST.cpp (original)
+++ cfe/trunk/lib/Sema/ParseAST.cpp Tue Apr 21 17:25:48 2009
@@ -14,6 +14,7 @@
 #include "clang/Sema/ParseAST.h"
 #include "Sema.h"
 #include "clang/Sema/SemaConsumer.h"
+#include "clang/Sema/ExternalSemaSource.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/Stmt.h"
@@ -50,8 +51,13 @@
   if (SemaConsumer *SC = dyn_cast<SemaConsumer>(Consumer))
     SC->InitializeSema(S);
 
-  if (Ctx.getExternalSource())
-    Ctx.getExternalSource()->StartTranslationUnit(Consumer);
+  if (ExternalASTSource *External = Ctx.getExternalSource()) {
+    if (ExternalSemaSource *ExternalSema = 
+          dyn_cast<ExternalSemaSource>(External))
+      ExternalSema->InitializeSema(S);
+
+    External->StartTranslationUnit(Consumer);
+  }
 
   Parser::DeclGroupPtrTy ADecl;
   

Modified: cfe/trunk/lib/Sema/SemaLookup.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaLookup.cpp?rev=69737&r1=69736&r2=69737&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/SemaLookup.cpp (original)
+++ cfe/trunk/lib/Sema/SemaLookup.cpp Tue Apr 21 17:25:48 2009
@@ -878,17 +878,6 @@
         // We have a single lookup result.
         return LookupResult::CreateLookupResult(Context, *I);
       }
-
-    /// If the context has an external AST source attached, look at
-    /// translation unit scope.
-    if (Context.getExternalSource()) {
-      DeclContext::lookup_iterator I, E;
-      for (llvm::tie(I, E) 
-             = Context.getTranslationUnitDecl()->lookup(Context, Name); 
-           I != E; ++I)
-        if (isAcceptableLookupResult(*I, NameKind, IDNS))
-          return LookupResult::CreateLookupResult(Context, I, E);
-    }
   } else {
     // Perform C++ unqualified name lookup.
     std::pair<bool, LookupResult> MaybeResult =

Added: cfe/trunk/test/PCH/builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/PCH/builtins.c?rev=69737&view=auto

==============================================================================
--- cfe/trunk/test/PCH/builtins.c (added)
+++ cfe/trunk/test/PCH/builtins.c Tue Apr 21 17:25:48 2009
@@ -0,0 +1,10 @@
+// Test this without pch.
+// RUN: clang-cc -include %S/builtins.h -fsyntax-only -verify %s &&
+
+// Test with pch.
+// RUN: clang-cc -emit-pch -o %t %S/builtins.h &&
+// RUN: clang-cc -include-pch %t -fsyntax-only -verify %s 
+
+void hello() {
+  printf("Hello, World!");
+}

Added: cfe/trunk/test/PCH/builtins.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/PCH/builtins.h?rev=69737&view=auto

==============================================================================
--- cfe/trunk/test/PCH/builtins.h (added)
+++ cfe/trunk/test/PCH/builtins.h Tue Apr 21 17:25:48 2009
@@ -0,0 +1,2 @@
+// Header for PCH test builtins.c
+int printf(char const *, ...);





More information about the cfe-commits mailing list