[cfe-commits] r69625 - in /cfe/trunk: include/clang/Basic/IdentifierTable.h include/clang/Frontend/PCHReader.h include/clang/Frontend/PCHWriter.h lib/Frontend/PCHReader.cpp lib/Frontend/PCHWriter.cpp

Douglas Gregor dgregor at apple.com
Mon Apr 20 13:36:09 PDT 2009


Author: dgregor
Date: Mon Apr 20 15:36:09 2009
New Revision: 69625

URL: http://llvm.org/viewvc/llvm-project?rev=69625&view=rev
Log:
Write the identifier table into the PCH file as an on-disk hash table
that also includes the contents of the IdentifierInfo itself (the
various fields and flags, along with the chain of identifiers visible
at the top level that have that name).

We don't make any use of the hash table yet, except that our
identifier ID -> string mapping points into the hash table now.


Modified:
    cfe/trunk/include/clang/Basic/IdentifierTable.h
    cfe/trunk/include/clang/Frontend/PCHReader.h
    cfe/trunk/include/clang/Frontend/PCHWriter.h
    cfe/trunk/lib/Frontend/PCHReader.cpp
    cfe/trunk/lib/Frontend/PCHWriter.cpp

Modified: cfe/trunk/include/clang/Basic/IdentifierTable.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/IdentifierTable.h?rev=69625&r1=69624&r2=69625&view=diff

==============================================================================
--- cfe/trunk/include/clang/Basic/IdentifierTable.h (original)
+++ cfe/trunk/include/clang/Basic/IdentifierTable.h Mon Apr 20 15:36:09 2009
@@ -141,7 +141,7 @@
       return tok::objc_not_keyword;
   }
   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
-  
+
   /// getBuiltinID - Return a value indicating whether this is a builtin
   /// function.  0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
   /// 2+ are specific builtin functions.
@@ -156,7 +156,10 @@
     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 
            && "ID too large for field!");
   }
-  
+
+  unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
+  void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
+
   /// get/setExtension - Initialize information about whether or not this
   /// language token is an extension.  This controls extension warnings, and is
   /// only valid if a custom token ID is set.

Modified: cfe/trunk/include/clang/Frontend/PCHReader.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHReader.h?rev=69625&r1=69624&r2=69625&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHReader.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHReader.h Mon Apr 20 15:36:09 2009
@@ -124,7 +124,8 @@
   ///
   /// Each element in this array is either an offset into
   /// IdentifierTable that contains the string data (if the lowest bit
-  /// is set) or is an IdentifierInfo* that has already been resolved.
+  /// is set, in which case the offset is shifted left by one) or is
+  /// an IdentifierInfo* that has already been resolved.
   llvm::SmallVector<uint64_t, 16> IdentifierData;
 
   /// \brief The set of external definitions stored in the the PCH

Modified: cfe/trunk/include/clang/Frontend/PCHWriter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHWriter.h?rev=69625&r1=69624&r2=69625&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHWriter.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHWriter.h Mon Apr 20 15:36:09 2009
@@ -94,6 +94,10 @@
   /// IdentifierInfo.
   llvm::DenseMap<const IdentifierInfo *, pch::IdentID> IdentifierIDs;
 
+  /// \brief Offsets of each of the identifier IDs into the identifier
+  /// table, shifted left by one bit with the low bit set.
+  llvm::SmallVector<uint64_t, 16> IdentifierOffsets;
+
   /// \brief Declarations encountered that might be external
   /// definitions.
   ///
@@ -162,14 +166,22 @@
   /// \brief Emit a reference to a declaration.
   void AddDeclRef(const Decl *D, RecordData &Record);
 
+  /// \brief Determine the declaration ID of an already-emitted
+  /// declaration.
+  pch::DeclID getDeclID(const Decl *D);
+
   /// \brief Emit a declaration name.
   void AddDeclarationName(DeclarationName Name, RecordData &Record);
 
   /// \brief Add a string to the given record.
   void AddString(const std::string &Str, RecordData &Record);
 
-  /// \brief Add the given statement or expression to the queue of statements to
-  /// emit.
+  /// \brief Note that the identifier II occurs at the given offset
+  /// within the identifier table.
+  void SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset);
+
+  /// \brief Add the given statement or expression to the queue of
+  /// statements to emit.
   ///
   /// This routine should be used when emitting types and declarations
   /// that have expressions as part of their formulation. Once the

Modified: cfe/trunk/lib/Frontend/PCHReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHReader.cpp?rev=69625&r1=69624&r2=69625&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHReader.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHReader.cpp Mon Apr 20 15:36:09 2009
@@ -2089,9 +2089,10 @@
   }
   
   if (IdentifierData[ID - 1] & 0x01) {
-    uint64_t Offset = IdentifierData[ID - 1];
+    uint64_t Offset = IdentifierData[ID - 1] >> 1;
     IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(
-                                                        &Context.Idents.get(IdentifierTable + Offset));
+                               &Context.Idents.get(IdentifierTable + Offset));
+    // FIXME: also read the contents of the IdentifierInfo.
   }
   
   return reinterpret_cast<IdentifierInfo *>(IdentifierData[ID - 1]);

Modified: cfe/trunk/lib/Frontend/PCHWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHWriter.cpp?rev=69625&r1=69624&r2=69625&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHWriter.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHWriter.cpp Mon Apr 20 15:36:09 2009
@@ -13,6 +13,7 @@
 
 #include "clang/Frontend/PCHWriter.h"
 #include "../Sema/Sema.h" // FIXME: move header into include/clang/Sema
+#include "../Sema/IdentifierResolver.h" // FIXME: move header 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclContextInternals.h"
@@ -23,6 +24,7 @@
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/OnDiskHashTable.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/SourceManagerInternals.h"
 #include "clang/Basic/TargetInfo.h"
@@ -1616,6 +1618,71 @@
   Stream.ExitBlock();
 }
 
+namespace {
+class VISIBILITY_HIDDEN PCHIdentifierTableTrait {
+  PCHWriter &Writer;
+
+public:
+  typedef const IdentifierInfo* key_type;
+  typedef key_type  key_type_ref;
+  
+  typedef pch::IdentID data_type;
+  typedef data_type data_type_ref;
+  
+  PCHIdentifierTableTrait(PCHWriter &Writer) : Writer(Writer) { }
+
+  static unsigned ComputeHash(const IdentifierInfo* II) {
+    return clang::BernsteinHash(II->getName());
+  }
+  
+  static std::pair<unsigned,unsigned> 
+    EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II, 
+                      pch::IdentID ID) {
+    unsigned KeyLen = strlen(II->getName()) + 1;
+    clang::io::Emit16(Out, KeyLen);
+    unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags
+                                  // 4 bytes for the persistent ID
+                                  // 2 bytes for the length of the decl chain
+    for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
+                                   DEnd = IdentifierResolver::end();
+         D != DEnd; ++D)
+      DataLen += sizeof(pch::DeclID);
+    return std::make_pair(KeyLen, DataLen);
+  }
+  
+  void EmitKey(llvm::raw_ostream& Out, const IdentifierInfo* II, 
+               unsigned KeyLen) {
+    // Record the location of the key data.  This is used when generating
+    // the mapping from persistent IDs to strings.
+    Writer.SetIdentifierOffset(II, Out.tell());
+    Out.write(II->getName(), KeyLen);
+  }
+  
+  void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II, 
+                pch::IdentID ID, unsigned) {
+    uint32_t Bits = 0;
+    Bits = Bits | (uint32_t)II->getTokenID();
+    Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID();
+    Bits = (Bits << 10) | II->hasMacroDefinition();
+    Bits = (Bits << 1) | II->isExtensionToken();
+    Bits = (Bits << 1) | II->isPoisoned();
+    Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword();
+    clang::io::Emit32(Out, Bits);
+    clang::io::Emit32(Out, ID);
+
+    llvm::SmallVector<pch::DeclID, 8> Decls;
+    for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
+                                   DEnd = IdentifierResolver::end();
+         D != DEnd; ++D)
+      Decls.push_back(Writer.getDeclID(*D));
+
+    clang::io::Emit16(Out, Decls.size());
+    for (unsigned I = 0; I < Decls.size(); ++I)
+      clang::io::Emit32(Out, Decls[I]);
+  }
+};
+} // end anonymous namespace
+
 /// \brief Write the identifier table into the PCH file.
 ///
 /// The identifier table consists of a blob containing string data
@@ -1626,43 +1693,42 @@
 
   // Create and write out the blob that contains the identifier
   // strings.
-  RecordData IdentOffsets;
-  IdentOffsets.resize(IdentifierIDs.size());
+  IdentifierOffsets.resize(IdentifierIDs.size());
   {
-    // Create the identifier string data.
-    std::vector<char> Data;
-    Data.push_back(0); // Data must not be empty.
+    OnDiskChainedHashTableGenerator<PCHIdentifierTableTrait> Generator;
+    
+    // Create the on-disk hash table representation.
     for (llvm::DenseMap<const IdentifierInfo *, pch::IdentID>::iterator
            ID = IdentifierIDs.begin(), IDEnd = IdentifierIDs.end();
          ID != IDEnd; ++ID) {
       assert(ID->first && "NULL identifier in identifier table");
+      Generator.insert(ID->first, ID->second);
+    }
 
-      // Make sure we're starting on an odd byte. The PCH reader
-      // expects the low bit to be set on all of the offsets.
-      if ((Data.size() & 0x01) == 0)
-        Data.push_back((char)0);
-
-      IdentOffsets[ID->second - 1] = Data.size();
-      Data.insert(Data.end(), 
-                  ID->first->getName(), 
-                  ID->first->getName() + ID->first->getLength());
-      Data.push_back((char)0);
+    // Create the on-disk hash table in a buffer.
+    llvm::SmallVector<char, 4096> IdentifierTable; 
+    {
+      PCHIdentifierTableTrait Trait(*this);
+      llvm::raw_svector_ostream Out(IdentifierTable);
+      Generator.Emit(Out, Trait);
     }
 
     // Create a blob abbreviation
     BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE));
-    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Triple name
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the identifier table
     RecordData Record;
     Record.push_back(pch::IDENTIFIER_TABLE);
-    Stream.EmitRecordWithBlob(IDTableAbbrev, Record, &Data.front(), Data.size());
+    Stream.EmitRecordWithBlob(IDTableAbbrev, Record, 
+                              &IdentifierTable.front(), 
+                              IdentifierTable.size());
   }
 
   // Write the offsets table for identifier IDs.
-  Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentOffsets);
+  Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentifierOffsets);
 }
 
 /// \brief Write a record containing the given attributes.
@@ -1791,6 +1857,12 @@
   Record.insert(Record.end(), Str.begin(), Str.end());
 }
 
+/// \brief Note that the identifier II occurs at the given offset
+/// within the identifier table.
+void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
+  IdentifierOffsets[IdentifierIDs[II] - 1] = (Offset << 1) | 0x01;
+}
+
 PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream) 
   : Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), NumStatements(0) { }
 
@@ -1930,6 +2002,14 @@
   Record.push_back(ID);
 }
 
+pch::DeclID PCHWriter::getDeclID(const Decl *D) {
+  if (D == 0)
+    return 0;
+
+  assert(DeclIDs.find(D) != DeclIDs.end() && "Declaration not emitted!");
+  return DeclIDs[D];
+}
+
 void PCHWriter::AddDeclarationName(DeclarationName Name, RecordData &Record) {
   Record.push_back(Name.getNameKind());
   switch (Name.getNameKind()) {





More information about the cfe-commits mailing list