[cfe-commits] r62248 - in /cfe/trunk: Driver/CacheTokens.cpp lib/Lex/PTHLexer.cpp

Ted Kremenek kremenek at apple.com
Wed Jan 14 17:26:25 PST 2009


Author: kremenek
Date: Wed Jan 14 19:26:25 2009
New Revision: 62248

URL: http://llvm.org/viewvc/llvm-project?rev=62248&view=rev
Log:
PTH: Embed a persistentID side-table in the PTH file that is sorted in the
lexical order of the corresponding identifier strings. This will be used for a
forthcoming optimization. This slows down PTH generation time by 7%. We can
revert this change if the optimization proves to not be valuable.

Modified:
    cfe/trunk/Driver/CacheTokens.cpp
    cfe/trunk/lib/Lex/PTHLexer.cpp

Modified: cfe/trunk/Driver/CacheTokens.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Driver/CacheTokens.cpp?rev=62248&r1=62247&r2=62248&view=diff

==============================================================================
--- cfe/trunk/Driver/CacheTokens.cpp (original)
+++ cfe/trunk/Driver/CacheTokens.cpp Wed Jan 14 19:26:25 2009
@@ -102,7 +102,7 @@
     for ( ; I != E ; ++I) Out << *I;
   }
   
-  std::pair<Offset,Offset> EmitIdentifierTable();
+  std::pair<Offset,std::pair<Offset, Offset> > EmitIdentifierTable();
   Offset EmitFileTable();
   PCHEntry LexTokens(Lexer& L);
   void EmitCachedSpellings();
@@ -171,57 +171,66 @@
 struct VISIBILITY_HIDDEN IDData {
   const IdentifierInfo* II;
   uint32_t FileOffset;
-  const IdentifierTable::const_iterator::value_type* Str;
+};
+  
+class VISIBILITY_HIDDEN CompareIDDataIndex {
+  IDData* Table;
+public:  
+  CompareIDDataIndex(IDData* table) : Table(table) {}
+
+  bool operator()(unsigned i, unsigned j) const {
+    // Assume that IdentifierInfo::getName() returns a '\0' terminated string.
+    return strcmp(Table[i].II->getName(), Table[j].II->getName()) < 0;    
+  }
 };
 }
 
-std::pair<Offset,Offset> PTHWriter::EmitIdentifierTable() {
-  
-  const IdentifierTable& T = PP.getIdentifierTable();
+std::pair<Offset,std::pair<Offset,Offset> >
+PTHWriter::EmitIdentifierTable() {  
+  llvm::BumpPtrAllocator Alloc;
 
   // Build an inverse map from persistent IDs -> IdentifierInfo*.
-  typedef std::vector<IDData> InverseIDMap;
-  InverseIDMap IIDMap;
-  IIDMap.resize(idcount);
+  IDData* IIDMap = Alloc.Allocate<IDData>(idcount);
   
   // Generate mapping from persistent IDs -> IdentifierInfo*.
   for (IDMap::iterator I=IM.begin(), E=IM.end(); I!=E; ++I) {
     // Decrement by 1 because we are using a vector for the lookup and
     // 0 is reserved for NULL.
     assert(I->second > 0);
-    assert(I->second-1 < IIDMap.size());
-    IIDMap[I->second-1].II = I->first;
-  }
-
-  // Get the string data associated with the IdentifierInfo.
-  for (IdentifierTable::const_iterator I=T.begin(), E=T.end(); I!=E; ++I) {
-    IDMap::iterator IDI = IM.find(&(I->getValue()));
-    if (IDI == IM.end()) continue;
-    IIDMap[IDI->second-1].Str = &(*I);
-  }
+    assert(I->second-1 < idcount);
+    unsigned idx = I->second-1;
+    IIDMap[idx].II = I->first;
+  }  
+  
+  // We want to write out the strings in lexical order to support binary
+  // search of strings to identifiers.  Create such a table.
+  unsigned *LexicalOrder = Alloc.Allocate<unsigned>(idcount);
+  for (unsigned i = 0; i < idcount ; ++i ) LexicalOrder[i] = i;
+  std::sort(LexicalOrder, LexicalOrder+idcount, CompareIDDataIndex(IIDMap));
+  
+  // Write out the lexically-sorted table of persistent ids.
+  Offset LexicalOff = Out.tell();
+  for (unsigned i = 0; i < idcount ; ++i) Emit32(LexicalOrder[i]);
   
+  // Write out the string data itself.
   Offset DataOff = Out.tell();
     
-  for (InverseIDMap::iterator I=IIDMap.begin(), E=IIDMap.end(); I!=E; ++I) {
-    // Record the location for this data.
-    I->FileOffset = Out.tell();
-    // Write out the keyword.
-    unsigned len = I->Str->getKeyLength();  
+  for (unsigned i = 0; i < idcount; ++i) {
+    IDData& d = IIDMap[i];
+    d.FileOffset = Out.tell();            // Record the location for this data.  
+    unsigned len = d.II->getLength(); // Write out the string length.
     Emit32(len);
-    const char* buf = I->Str->getKeyData();    
+    const char* buf = d.II->getName(); // Write out the string data.
     EmitBuf(buf, buf+len);  
   }
   
   // Now emit the table mapping from persistent IDs to PTH file offsets.  
   Offset IDOff = Out.tell();
-  
-  // Emit the number of identifiers.
-  Emit32(idcount);
+  Emit32(idcount);  // Emit the number of identifiers.
+  for (unsigned i = 0 ; i < idcount; ++i) Emit32(IIDMap[i].FileOffset);
 
-  for (InverseIDMap::iterator I=IIDMap.begin(), E=IIDMap.end(); I!=E; ++I)
-    Emit32(I->FileOffset);
 
-  return std::make_pair(DataOff, IDOff);
+  return std::make_pair(DataOff, std::make_pair(IDOff, LexicalOff));
 }
 
 Offset PTHWriter::EmitFileTable() {
@@ -473,7 +482,8 @@
   }
 
   // Write out the identifier table.
-  std::pair<Offset,Offset> IdTableOff = EmitIdentifierTable();
+  const std::pair<Offset, std::pair<Offset,Offset> >& IdTableOff 
+    = EmitIdentifierTable();
   
   // Write out the cached strings table.
   EmitCachedSpellings();
@@ -483,7 +493,8 @@
   
   // Finally, write out the offset table at the end.
   Emit32(IdTableOff.first);
-  Emit32(IdTableOff.second);
+  Emit32(IdTableOff.second.first);
+  Emit32(IdTableOff.second.second);
   Emit32(FileTableOff);
 }
 

Modified: cfe/trunk/lib/Lex/PTHLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PTHLexer.cpp?rev=62248&r1=62247&r2=62248&view=diff

==============================================================================
--- cfe/trunk/lib/Lex/PTHLexer.cpp (original)
+++ cfe/trunk/lib/Lex/PTHLexer.cpp Wed Jan 14 19:26:25 2009
@@ -538,11 +538,12 @@
   // Compute the address of the index table at the end of the PTH file.
   // This table contains the offset of the file lookup table, the
   // persistent ID -> identifer data table.
-  const char* EndTable = BufEnd - sizeof(uint32_t)*3;
+  // FIXME: We should just embed this offset in the PTH file.
+  const char* EndTable = BufEnd - sizeof(uint32_t)*4;
   
   // Construct the file lookup table.  This will be used for mapping from
   // FileEntry*'s to cached tokens.
-  const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
+  const char* FileTableOffset = EndTable + sizeof(uint32_t)*3;
   const char* FileTable = BufBeg + Read32(FileTableOffset);
   
   if (!(FileTable > BufBeg && FileTable < BufEnd)) {





More information about the cfe-commits mailing list