[cfe-commits] r70063 - in /cfe/trunk: include/clang/Frontend/PCHReader.h include/clang/Frontend/PCHWriter.h lib/Frontend/PCHReader.cpp lib/Frontend/PCHWriter.cpp

Douglas Gregor dgregor at apple.com
Sat Apr 25 12:10:14 PDT 2009


Author: dgregor
Date: Sat Apr 25 14:10:14 2009
New Revision: 70063

URL: http://llvm.org/viewvc/llvm-project?rev=70063&view=rev
Log:
Write the identifier offsets array into the PCH file as a blob, so
that the PCH reader does not have to decode the VBR encoding at PCH
load time.

Also, reduce the size of the identifier offsets from 64 bits down to
32 bits. The identifier table itself isn't going to grow to more than
4GB :)

Overall, this results in a 13% speedup in the Cocoa-prefixed "Hello,
World" benchmark.


Modified:
    cfe/trunk/include/clang/Frontend/PCHReader.h
    cfe/trunk/include/clang/Frontend/PCHWriter.h
    cfe/trunk/lib/Frontend/PCHReader.cpp
    cfe/trunk/lib/Frontend/PCHWriter.cpp

Modified: cfe/trunk/include/clang/Frontend/PCHReader.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHReader.h?rev=70063&r1=70062&r2=70063&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHReader.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHReader.h Sat Apr 25 14:10:14 2009
@@ -121,21 +121,28 @@
 
   /// \brief Actual data for the on-disk hash table.
   ///
-  /// FIXME: This will eventually go away.
+  // This pointer points into a memory buffer, where the on-disk hash
+  // table for identifiers actually lives.
   const char *IdentifierTableData;
 
   /// \brief A pointer to an on-disk hash table of opaque type
   /// IdentifierHashTable.
   void *IdentifierLookupTable;
 
-  /// \brief String data for identifiers, indexed by the identifier ID
-  /// minus one.
+  /// \brief Offsets into the identifier table data.
   ///
-  /// Each element in this array is either an offset into
-  /// IdentifierTable that contains the string data (if the lowest bit
-  /// is set, in which case the offset is shifted left by one) or is
-  /// an IdentifierInfo* that has already been resolved.
-  llvm::SmallVector<uint64_t, 16> IdentifierData;
+  /// This array is indexed by the identifier ID (-1), and provides
+  /// the offset into IdentifierTableData where the string data is
+  /// stored.
+  const uint32_t *IdentifierOffsets;
+
+  /// \brief A vector containing identifiers that have already been
+  /// loaded.
+  ///
+  /// If the pointer at index I is non-NULL, then it refers to the
+  /// IdentifierInfo for the identifier with ID=I+1 that has already
+  /// been loaded.
+  std::vector<IdentifierInfo *> IdentifiersLoaded;
 
   /// \brief A pointer to an on-disk hash table of opaque type
   /// PCHMethodPoolLookupTable.
@@ -256,6 +263,7 @@
   explicit PCHReader(Preprocessor &PP, ASTContext &Context) 
     : SemaObj(0), PP(PP), Context(Context), Consumer(0),
       IdentifierTableData(0), IdentifierLookupTable(0),
+      IdentifierOffsets(0),
       MethodPoolLookupTable(0), MethodPoolLookupTableData(0),
       TotalSelectorsInMethodPool(0), SelectorOffsets(0),
       TotalNumSelectors(0), NumStatementsRead(0), NumMacrosRead(0),
@@ -349,7 +357,7 @@
   virtual std::pair<ObjCMethodList, ObjCMethodList> 
     ReadMethodPool(Selector Sel);
 
-  void SetIdentifierInfo(unsigned ID, const IdentifierInfo *II);
+  void SetIdentifierInfo(unsigned ID, IdentifierInfo *II);
 
   /// \brief Report a diagnostic.
   DiagnosticBuilder Diag(unsigned DiagID);

Modified: cfe/trunk/include/clang/Frontend/PCHWriter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHWriter.h?rev=70063&r1=70062&r2=70063&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHWriter.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHWriter.h Sat Apr 25 14:10:14 2009
@@ -95,18 +95,18 @@
   llvm::DenseMap<const IdentifierInfo *, pch::IdentID> IdentifierIDs;
   
   /// \brief Offsets of each of the identifier IDs into the identifier
-  /// table, shifted left by one bit with the low bit set.
-  llvm::SmallVector<uint64_t, 16> IdentifierOffsets;
+  /// table.
+  std::vector<uint32_t> IdentifierOffsets;
 
   /// \brief Map that provides the ID numbers of each Selector.
   llvm::DenseMap<Selector, pch::SelectorID> SelectorIDs;
   
   /// \brief Offset of each selector within the method pool/selector
   /// table, indexed by the Selector ID (-1).
-  llvm::SmallVector<uint32_t, 16> SelectorOffsets;
+  std::vector<uint32_t> SelectorOffsets;
 
   /// \brief A vector of all Selectors (ordered by ID).
-  llvm::SmallVector<Selector, 16> SelVector;
+  std::vector<Selector> SelVector;
   
   /// \brief Offsets of each of the macro identifiers into the
   /// bitstream.

Modified: cfe/trunk/lib/Frontend/PCHReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHReader.cpp?rev=70063&r1=70062&r2=70063&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHReader.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHReader.cpp Sat Apr 25 14:10:14 2009
@@ -1837,28 +1837,23 @@
 
     case pch::IDENTIFIER_TABLE:
       IdentifierTableData = BlobStart;
-      IdentifierLookupTable 
-        = PCHIdentifierLookupTable::Create(
+      if (Record[0]) {
+        IdentifierLookupTable 
+          = PCHIdentifierLookupTable::Create(
                         (const unsigned char *)IdentifierTableData + Record[0],
                         (const unsigned char *)IdentifierTableData, 
                         PCHIdentifierLookupTrait(*this));
-      PP.getIdentifierTable().setExternalIdentifierLookup(this);
+        PP.getIdentifierTable().setExternalIdentifierLookup(this);
+      }
       break;
 
     case pch::IDENTIFIER_OFFSET:
-      if (!IdentifierData.empty()) {
+      if (!IdentifiersLoaded.empty()) {
         Error("Duplicate IDENTIFIER_OFFSET record in PCH file");
         return Failure;
       }
-      IdentifierData.swap(Record);
-#ifndef NDEBUG
-      for (unsigned I = 0, N = IdentifierData.size(); I != N; ++I) {
-        if ((IdentifierData[I] & 0x01) == 0) {
-          Error("Malformed identifier table in the precompiled header");
-          return Failure;
-        }
-      }
-#endif
+      IdentifierOffsets = (const uint32_t *)BlobStart;
+      IdentifiersLoaded.resize(Record[0]);
       break;
 
     case pch::EXTERNAL_DEFINITIONS:
@@ -2710,22 +2705,20 @@
 void PCHReader::PrintStats() {
   std::fprintf(stderr, "*** PCH Statistics:\n");
 
-  unsigned NumTypesLoaded = 
-    TypesLoaded.size() - std::count(TypesLoaded.begin(), TypesLoaded.end(),
-                                    (Type *)0);
-  unsigned NumDeclsLoaded = 
-    DeclsLoaded.size() - std::count(DeclsLoaded.begin(), DeclsLoaded.end(),
-                                    (Decl *)0);
-  unsigned NumIdentifiersLoaded = 0;
-  for (unsigned I = 0; I < IdentifierData.size(); ++I) {
-    if ((IdentifierData[I] & 0x01) == 0)
-      ++NumIdentifiersLoaded;
-  }
-  unsigned NumSelectorsLoaded = 0;
-  for (unsigned I = 0; I < SelectorsLoaded.size(); ++I) {
-    if (SelectorsLoaded[I].getAsOpaquePtr())
-      ++NumSelectorsLoaded;
-  }
+  unsigned NumTypesLoaded 
+    = TypesLoaded.size() - std::count(TypesLoaded.begin(), TypesLoaded.end(),
+                                      (Type *)0);
+  unsigned NumDeclsLoaded
+    = DeclsLoaded.size() - std::count(DeclsLoaded.begin(), DeclsLoaded.end(),
+                                      (Decl *)0);
+  unsigned NumIdentifiersLoaded
+    = IdentifiersLoaded.size() - std::count(IdentifiersLoaded.begin(),
+                                            IdentifiersLoaded.end(),
+                                            (IdentifierInfo *)0);
+  unsigned NumSelectorsLoaded 
+    = SelectorsLoaded.size() - std::count(SelectorsLoaded.begin(),
+                                          SelectorsLoaded.end(),
+                                          Selector());
 
   if (!TypesLoaded.empty())
     std::fprintf(stderr, "  %u/%u types read (%f%%)\n",
@@ -2735,10 +2728,10 @@
     std::fprintf(stderr, "  %u/%u declarations read (%f%%)\n",
                  NumDeclsLoaded, (unsigned)DeclsLoaded.size(),
                  ((float)NumDeclsLoaded/DeclsLoaded.size() * 100));
-  if (!IdentifierData.empty())
+  if (!IdentifiersLoaded.empty())
     std::fprintf(stderr, "  %u/%u identifiers read (%f%%)\n",
-                 NumIdentifiersLoaded, (unsigned)IdentifierData.size(),
-                 ((float)NumIdentifiersLoaded/IdentifierData.size() * 100));
+                 NumIdentifiersLoaded, (unsigned)IdentifiersLoaded.size(),
+                 ((float)NumIdentifiersLoaded/IdentifiersLoaded.size() * 100));
   if (TotalNumSelectors)
     std::fprintf(stderr, "  %u/%u selectors read (%f%%)\n",
                  NumSelectorsLoaded, TotalNumSelectors,
@@ -2832,27 +2825,28 @@
   return *Pos;
 }
 
-void PCHReader::SetIdentifierInfo(unsigned ID, const IdentifierInfo *II) {
+void PCHReader::SetIdentifierInfo(unsigned ID, IdentifierInfo *II) {
   assert(ID && "Non-zero identifier ID required");
-  IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(II);
+  assert(ID <= IdentifiersLoaded.size() && "Identifier ID out of range");
+  IdentifiersLoaded[ID - 1] = II;
 }
 
 IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) {
   if (ID == 0)
     return 0;
   
-  if (!IdentifierTableData || IdentifierData.empty()) {
+  if (!IdentifierTableData || IdentifiersLoaded.empty()) {
     Error("No identifier table in PCH file");
     return 0;
   }
   
-  if (IdentifierData[ID - 1] & 0x01) {
-    uint64_t Offset = IdentifierData[ID - 1] >> 1;
-    IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(
-                               &Context.Idents.get(IdentifierTableData + Offset));
+  if (!IdentifiersLoaded[ID - 1]) {
+    uint32_t Offset = IdentifierOffsets[ID - 1];
+    IdentifiersLoaded[ID - 1] 
+      = &Context.Idents.get(IdentifierTableData + Offset);
   }
   
-  return reinterpret_cast<IdentifierInfo *>(IdentifierData[ID - 1]);
+  return IdentifiersLoaded[ID - 1];
 }
 
 Selector PCHReader::DecodeSelector(unsigned ID) {

Modified: cfe/trunk/lib/Frontend/PCHWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHWriter.cpp?rev=70063&r1=70062&r2=70063&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHWriter.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHWriter.cpp Sat Apr 25 14:10:14 2009
@@ -2119,7 +2119,18 @@
   }
 
   // Write the offsets table for identifier IDs.
-  Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentifierOffsets);
+  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_OFFSET));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+  unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
+
+  RecordData Record;
+  Record.push_back(pch::IDENTIFIER_OFFSET);
+  Record.push_back(IdentifierOffsets.size());
+  Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
+                            (const char *)&IdentifierOffsets.front(),
+                            IdentifierOffsets.size() * sizeof(uint32_t));
 }
 
 /// \brief Write a record containing the given attributes.
@@ -2253,7 +2264,7 @@
 /// \brief Note that the identifier II occurs at the given offset
 /// within the identifier table.
 void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
-  IdentifierOffsets[IdentifierIDs[II] - 1] = (Offset << 1) | 0x01;
+  IdentifierOffsets[IdentifierIDs[II] - 1] = Offset;
 }
 
 /// \brief Note that the selector Sel occurs at the given offset





More information about the cfe-commits mailing list