[cfe-commits] r68846 - in /cfe/trunk: include/clang/Frontend/PCHBitCodes.h include/clang/Frontend/PCHReader.h include/clang/Frontend/PCHWriter.h lib/Frontend/PCHReader.cpp lib/Frontend/PCHWriter.cpp

Douglas Gregor dgregor at apple.com
Fri Apr 10 17:14:33 PDT 2009


Author: dgregor
Date: Fri Apr 10 19:14:32 2009
New Revision: 68846

URL: http://llvm.org/viewvc/llvm-project?rev=68846&view=rev
Log:
Store unique IDs for identifiers in the PCH file. Use some bitmangling
so that we only need to perform the lookup and identifier resolution
once per identifier in the PCH file.

Modified:
    cfe/trunk/include/clang/Frontend/PCHBitCodes.h
    cfe/trunk/include/clang/Frontend/PCHReader.h
    cfe/trunk/include/clang/Frontend/PCHWriter.h
    cfe/trunk/lib/Frontend/PCHReader.cpp
    cfe/trunk/lib/Frontend/PCHWriter.cpp

Modified: cfe/trunk/include/clang/Frontend/PCHBitCodes.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHBitCodes.h?rev=68846&r1=68845&r2=68846&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHBitCodes.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHBitCodes.h Fri Apr 10 19:14:32 2009
@@ -41,6 +41,10 @@
     /// other types that have serialized representations.
     typedef uint32_t TypeID;
 
+    /// \brief An ID number that refers to an identifier in a PCH
+    /// file.
+    typedef uint32_t IdentID;
+
     /// \brief Describes the various kinds of blocks that occur within
     /// a PCH file.
     enum BlockIDs {
@@ -104,7 +108,29 @@
 
       /// \brief Record code for the target triple used to build the
       /// PCH file.
-      TARGET_TRIPLE = 4
+      TARGET_TRIPLE = 4,
+
+      /// \brief Record code for the table of offsets of each
+      /// identifier ID.
+      ///
+      /// The offset table contains offsets into the blob stored in
+      /// the IDENTIFIER_TABLE record. Each offset points to the
+      /// NULL-terminated string that corresponds to that identifier.
+      IDENTIFIER_OFFSET = 5,
+
+      /// \brief Record code for the identifier table.
+      ///
+      /// The identifier table is a simple blob that contains
+      /// NULL-terminated strings for all of the identifiers
+      /// referenced by the PCH file. The IDENTIFIER_OFFSET table
+      /// contains the mapping from identifier IDs to the characters
+      /// in this blob. Note that the starting offsets of all of the
+      /// identifiers are odd, so that, when the identifier offset
+      /// table is loaded in, we can use the low bit to distinguish
+      /// between offsets (for unresolved identifier IDs) and
+      /// IdentifierInfo pointers (for already-resolved identifier
+      /// IDs).
+      IDENTIFIER_TABLE = 6
     };
 
     /// \brief Record types used within a source manager block.

Modified: cfe/trunk/include/clang/Frontend/PCHReader.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHReader.h?rev=68846&r1=68845&r2=68846&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHReader.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHReader.h Fri Apr 10 19:14:32 2009
@@ -107,6 +107,17 @@
   /// DeclContext.
   DeclContextOffsetsMap DeclContextOffsets;
 
+  /// \brief String data for the identifiers in the PCH file.
+  const char *IdentifierTable;
+
+  /// \brief String data for identifiers, indexed by the identifier ID
+  /// minus one.
+  ///
+  /// Each element in this array is either an offset into
+  /// IdentifierTable that contains the string data (if the lowest bit
+  /// is set) or is an IdentifierInfo* that has already been resolved.
+  llvm::SmallVector<uint64_t, 16> IdentifierData;
+
   PCHReadResult ReadPCHBlock();
   bool CheckPredefinesBuffer(const char *PCHPredef, 
                              unsigned PCHPredefLen,

Modified: cfe/trunk/include/clang/Frontend/PCHWriter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHWriter.h?rev=68846&r1=68845&r2=68846&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHWriter.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHWriter.h Fri Apr 10 19:14:32 2009
@@ -77,6 +77,14 @@
   /// \brief The type ID that will be assigned to the next new type.
   pch::TypeID NextTypeID;
 
+  /// \brief Map that provides the ID numbers of each identifier in
+  /// the output stream.
+  ///
+  /// The ID numbers for identifiers are consecutive (in order of
+  /// discovery), starting at 1. An ID of zero refers to a NULL
+  /// IdentifierInfo.
+  llvm::DenseMap<const IdentifierInfo *, pch::IdentID> IdentifierIDs;
+
   void WriteTargetTriple(const TargetInfo &Target);
   void WriteLanguageOptions(const LangOptions &LangOpts);
   void WriteSourceManagerBlock(SourceManager &SourceMgr);
@@ -86,6 +94,7 @@
   uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC);
   uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
   void WriteDeclsBlock(ASTContext &Context);
+  void WriteIdentifierTable();
 
 public:
   typedef llvm::SmallVector<uint64_t, 64> RecordData;

Modified: cfe/trunk/lib/Frontend/PCHReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHReader.cpp?rev=68846&r1=68845&r2=68846&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHReader.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHReader.cpp Fri Apr 10 19:14:32 2009
@@ -477,7 +477,7 @@
         return IgnorePCH;
       break;
 
-    case pch::TARGET_TRIPLE:
+    case pch::TARGET_TRIPLE: {
       std::string TargetTriple(BlobStart, BlobLen);
       if (TargetTriple != Context.Target.getTargetTriple()) {
         Diag(diag::warn_pch_target_triple)
@@ -487,6 +487,27 @@
       }
       break;
     }
+
+    case pch::IDENTIFIER_TABLE:
+      IdentifierTable = BlobStart;
+      break;
+
+    case pch::IDENTIFIER_OFFSET:
+      if (!IdentifierData.empty()) {
+        Error("Duplicate IDENTIFIER_OFFSET record in PCH file");
+        return Failure;
+      }
+      IdentifierData.swap(Record);
+#ifndef NDEBUG
+      for (unsigned I = 0, N = IdentifierData.size(); I != N; ++I) {
+        if ((IdentifierData[I] & 0x01) == 0) {
+          Error("Malformed identifier table in the precompiled header");
+          return Failure;
+        }
+      }
+#endif
+      break;
+    }
   }
 
   Error("Premature end of bitstream");
@@ -927,13 +948,22 @@
 
 const IdentifierInfo *PCHReader::GetIdentifierInfo(const RecordData &Record, 
                                                    unsigned &Idx) {
-  // FIXME: we need unique IDs for identifiers.
-  std::string Str;
-  unsigned Length = Record[Idx++];
-  Str.resize(Length);
-  for (unsigned I = 0; I != Length; ++I)
-    Str[I] = Record[Idx++];
-  return &Context.Idents.get(Str);
+  pch::IdentID ID = Record[Idx++];
+  if (ID == 0)
+    return 0;
+
+  if (!IdentifierTable || IdentifierData.empty()) {
+    Error("No identifier table in PCH file");
+    return 0;
+  }
+
+  if (IdentifierData[ID - 1] & 0x01) {
+    uint64_t Offset = IdentifierData[ID - 1];
+    IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(
+                               &Context.Idents.get(IdentifierTable + Offset));
+  }
+
+  return reinterpret_cast<const IdentifierInfo *>(IdentifierData[ID - 1]);
 }
 
 DeclarationName 

Modified: cfe/trunk/lib/Frontend/PCHWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHWriter.cpp?rev=68846&r1=68845&r2=68846&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHWriter.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHWriter.cpp Fri Apr 10 19:14:32 2009
@@ -818,6 +818,55 @@
   S.ExitBlock();
 }
 
+/// \brief Write the identifier table into the PCH file.
+///
+/// The identifier table consists of a blob containing string data
+/// (the actual identifiers themselves) and a separate "offsets" index
+/// that maps identifier IDs to locations within the blob.
+void PCHWriter::WriteIdentifierTable() {
+  using namespace llvm;
+
+  // Create and write out the blob that contains the identifier
+  // strings.
+  RecordData IdentOffsets;
+  IdentOffsets.resize(IdentifierIDs.size());
+  {
+    // Create the identifier string data.
+    std::vector<char> Data;
+    Data.push_back(0); // Data must not be empty.
+    for (llvm::DenseMap<const IdentifierInfo *, pch::IdentID>::iterator
+           ID = IdentifierIDs.begin(), IDEnd = IdentifierIDs.end();
+         ID != IDEnd; ++ID) {
+      assert(ID->first && "NULL identifier in identifier table");
+
+      // Make sure we're starting on an odd byte. The PCH reader
+      // expects the low bit to be set on all of the offsets.
+      if ((Data.size() & 0x01) == 0)
+        Data.push_back((char)0);
+
+      IdentOffsets[ID->second - 1] = Data.size();
+      Data.insert(Data.end(), 
+                  ID->first->getName(), 
+                  ID->first->getName() + ID->first->getLength());
+      Data.push_back((char)0);
+    }
+
+    // Create a blob abbreviation
+    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+    Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Triple name
+    unsigned IDTableAbbrev = S.EmitAbbrev(Abbrev);
+
+    // Write the identifier table
+    RecordData Record;
+    Record.push_back(pch::IDENTIFIER_TABLE);
+    S.EmitRecordWithBlob(IDTableAbbrev, Record, &Data.front(), Data.size());
+  }
+
+  // Write the offsets table for identifier IDs.
+  S.EmitRecord(pch::IDENTIFIER_OFFSET, IdentOffsets);
+}
+
 PCHWriter::PCHWriter(llvm::BitstreamWriter &S) 
   : S(S), NextTypeID(pch::NUM_PREDEF_TYPE_IDS) { }
 
@@ -842,6 +891,7 @@
   WriteDeclsBlock(Context);
   S.EmitRecord(pch::TYPE_OFFSET, TypeOffsets);
   S.EmitRecord(pch::DECL_OFFSET, DeclOffsets);
+  WriteIdentifierTable();
   S.ExitBlock();
 }
 
@@ -858,11 +908,16 @@
 }
 
 void PCHWriter::AddIdentifierRef(const IdentifierInfo *II, RecordData &Record) {
-  // FIXME: Emit an identifier ID, not the actual string!
-  const char *Name = II->getName();
-  unsigned Len = strlen(Name);
-  Record.push_back(Len);
-  Record.insert(Record.end(), Name, Name + Len);
+  if (II == 0) {
+    Record.push_back(0);
+    return;
+  }
+
+  pch::IdentID &ID = IdentifierIDs[II];
+  if (ID == 0)
+    ID = IdentifierIDs.size();
+  
+  Record.push_back(ID);
 }
 
 void PCHWriter::AddTypeRef(QualType T, RecordData &Record) {





More information about the cfe-commits mailing list