[cfe-commits] r70075 - in /cfe/trunk/lib/Frontend: PCHReader.cpp PCHWriter.cpp

Douglas Gregor dgregor at apple.com
Sat Apr 25 14:04:17 PDT 2009


Author: dgregor
Date: Sat Apr 25 16:04:17 2009
New Revision: 70075

URL: http://llvm.org/viewvc/llvm-project?rev=70075&view=rev
Log:
PCH optimization for the identifier table, where we separate
"interesting" identifiers (e.g., those where the IdentifierInfo has
some useful information) from "uninteresting" identifiers (where the
IdentifierInfo is just a name). This makes the hash table smaller (so
searching in it should be faster) and, when loading "uninteresting"
identifiers, we skip the lookup in the hash table.

PCH file size is slightly smaller than before (since we don't emit the
contents of the uninteresting IdentifierInfo structures). The
Cocoa.h-prefixed "Hello, World" doesn't show any speedup, although
we're getting to the point where system noise is a bit issue.


Modified:
    cfe/trunk/lib/Frontend/PCHReader.cpp
    cfe/trunk/lib/Frontend/PCHWriter.cpp

Modified: cfe/trunk/lib/Frontend/PCHReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHReader.cpp?rev=70075&r1=70074&r2=70075&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHReader.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHReader.cpp Sat Apr 25 16:04:17 2009
@@ -1245,8 +1245,8 @@
   static std::pair<unsigned, unsigned>
   ReadKeyDataLength(const unsigned char*& d) {
     using namespace clang::io;
-    unsigned KeyLen = ReadUnalignedLE16(d);
     unsigned DataLen = ReadUnalignedLE16(d);
+    unsigned KeyLen = ReadUnalignedLE16(d);
     return std::make_pair(KeyLen, DataLen);
   }
     
@@ -2842,8 +2842,31 @@
   
   if (!IdentifiersLoaded[ID - 1]) {
     uint32_t Offset = IdentifierOffsets[ID - 1];
-    IdentifiersLoaded[ID - 1] 
-      = &Context.Idents.get(IdentifierTableData + Offset);
+
+    // If there is an identifier lookup table, but the offset of this
+    // string is after the identifier table itself, then we know that
+    // this string is not in the on-disk hash table. Therefore,
+    // disable lookup into the hash table when looking for this
+    // identifier.
+    PCHIdentifierLookupTable *IdTable 
+      = (PCHIdentifierLookupTable *)IdentifierLookupTable;
+    bool SkipHashTable = IdTable &&
+      Offset >= uint32_t(IdTable->getBuckets() - IdTable->getBase());
+
+    if (SkipHashTable)
+      PP.getIdentifierTable().setExternalIdentifierLookup(0);
+
+    // All of the strings in the PCH file are preceded by a 16-bit
+    // length. Extract that 16-bit length to avoid having to run
+    // strlen().
+    const char *Str = IdentifierTableData + Offset;
+    const char *StrLenPtr = Str - 2;
+    unsigned StrLen = (((unsigned) StrLenPtr[0])
+                       | (((unsigned) StrLenPtr[1]) << 8)) - 1;
+    IdentifiersLoaded[ID - 1] = &Context.Idents.get(Str, Str + StrLen);
+
+    if (SkipHashTable)
+      PP.getIdentifierTable().setExternalIdentifierLookup(this);
   }
   
   return IdentifiersLoaded[ID - 1];

Modified: cfe/trunk/lib/Frontend/PCHWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHWriter.cpp?rev=70075&r1=70074&r2=70075&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHWriter.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHWriter.cpp Sat Apr 25 16:04:17 2009
@@ -2013,7 +2013,6 @@
     EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II, 
                       pch::IdentID ID) {
     unsigned KeyLen = strlen(II->getName()) + 1;
-    clang::io::Emit16(Out, KeyLen);
     unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
                               // 4 bytes for the persistent ID
     if (II->hasMacroDefinition() && 
@@ -2023,7 +2022,13 @@
                                    DEnd = IdentifierResolver::end();
          D != DEnd; ++D)
       DataLen += sizeof(pch::DeclID);
+    // We emit the key length after the data length so that the
+    // "uninteresting" identifiers following the identifier hash table
+    // structure will have the same (key length, key characters)
+    // layout as the keys in the hash table. This also matches the
+    // format for identifiers in pretokenized headers.
     clang::io::Emit16(Out, DataLen);
+    clang::io::Emit16(Out, KeyLen);
     return std::make_pair(KeyLen, DataLen);
   }
   
@@ -2083,12 +2088,33 @@
   {
     OnDiskChainedHashTableGenerator<PCHIdentifierTableTrait> Generator;
     
+    llvm::SmallVector<const IdentifierInfo *, 32> UninterestingIdentifiers;
+
     // Create the on-disk hash table representation.
     for (llvm::DenseMap<const IdentifierInfo *, pch::IdentID>::iterator
            ID = IdentifierIDs.begin(), IDEnd = IdentifierIDs.end();
          ID != IDEnd; ++ID) {
       assert(ID->first && "NULL identifier in identifier table");
-      Generator.insert(ID->first, ID->second);
+
+      // Classify each identifier as either "interesting" or "not
+      // interesting". Interesting identifiers are those that have
+      // additional information that needs to be read from the PCH
+      // file, e.g., a built-in ID, declaration chain, or macro
+      // definition. These identifiers are placed into the hash table
+      // so that they can be found when looked up in the user program.
+      // All other identifiers are "uninteresting", which means that
+      // the IdentifierInfo built by default has all of the
+      // information we care about. Such identifiers are placed after
+      // the hash table.
+      const IdentifierInfo *II = ID->first;
+      if (II->isPoisoned() ||
+          II->isExtensionToken() ||
+          II->hasMacroDefinition() ||
+          II->getObjCOrBuiltinID() ||
+          II->getFETokenInfo<void>())
+        Generator.insert(ID->first, ID->second);
+      else
+        UninterestingIdentifiers.push_back(II);
     }
 
     // Create the on-disk hash table in a buffer.
@@ -2100,6 +2126,14 @@
       // Make sure that no bucket is at offset 0
       clang::io::Emit32(Out, 0);
       BucketOffset = Generator.Emit(Out, Trait);
+      
+      for (unsigned I = 0, N = UninterestingIdentifiers.size(); I != N; ++I) {
+        const IdentifierInfo *II = UninterestingIdentifiers[I];
+        unsigned N = II->getLength() + 1;
+        clang::io::Emit16(Out, N);
+        SetIdentifierOffset(II, Out.tell());
+        Out.write(II->getName(), N);
+      }
     }
 
     // Create a blob abbreviation





More information about the cfe-commits mailing list