[cfe-commits] r69755 - in /cfe/trunk: include/clang/Frontend/PCHBitCodes.h include/clang/Frontend/PCHReader.h include/clang/Frontend/PCHWriter.h lib/Frontend/PCHReader.cpp lib/Frontend/PCHWriter.cpp

Douglas Gregor dgregor at apple.com
Tue Apr 21 16:56:24 PDT 2009


Author: dgregor
Date: Tue Apr 21 18:56:24 2009
New Revision: 69755

URL: http://llvm.org/viewvc/llvm-project?rev=69755&view=rev
Log:
Lazy deserialization of macro definitions for precompiled headers.

This optimization improves performance on the Carbon-prefixed "Hello,
World!" example by 57%. For reference, we're now about 2.25x faster
than GCC PCH. We're also pulling in far less of the PCH file:

*** PCH Statistics:
  411/20693 types read (1.986179%)
  2553/59230 declarations read (4.310316%)
  1093/44646 identifiers read (2.448148%)
  1/32954 statements read (0.003035%)
  21/6187 macros read (0.339421%)



Modified:
    cfe/trunk/include/clang/Frontend/PCHBitCodes.h
    cfe/trunk/include/clang/Frontend/PCHReader.h
    cfe/trunk/include/clang/Frontend/PCHWriter.h
    cfe/trunk/lib/Frontend/PCHReader.cpp
    cfe/trunk/lib/Frontend/PCHWriter.cpp

Modified: cfe/trunk/include/clang/Frontend/PCHBitCodes.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHBitCodes.h?rev=69755&r1=69754&r2=69755&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHBitCodes.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHBitCodes.h Tue Apr 21 18:56:24 2009
@@ -198,7 +198,6 @@
       /// [PP_COUNTER_VALUE, Val]
       PP_COUNTER_VALUE = 4
     };
-    
 
     /// \defgroup PCHAST Precompiled header AST constants
     ///

Modified: cfe/trunk/include/clang/Frontend/PCHReader.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHReader.h?rev=69755&r1=69754&r2=69755&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHReader.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHReader.h Tue Apr 21 18:56:24 2009
@@ -171,6 +171,11 @@
   /// in the PCH file.
   unsigned TotalNumStatements;
 
+  /// \brief The number of macros de-serialized from the PCH file.
+  unsigned NumMacrosRead;
+  /// \brief The total number of macros stored in the PCH file.
+  unsigned TotalNumMacros;
+
   /// \brief FIXME: document!
   llvm::SmallVector<uint64_t, 4> SpecialTypes;
 
@@ -311,6 +316,9 @@
   /// \brief Reads a statement from the current stream position.
   Stmt *ReadStmt();
 
+  /// \brief Reads the macro record located at the given offset.
+  void ReadMacroRecord(uint64_t Offset);
+
   /// \brief Retrieve the AST context that this PCH reader
   /// supplements.
   ASTContext &getContext() { return Context; }

Modified: cfe/trunk/include/clang/Frontend/PCHWriter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/PCHWriter.h?rev=69755&r1=69754&r2=69755&view=diff

==============================================================================
--- cfe/trunk/include/clang/Frontend/PCHWriter.h (original)
+++ cfe/trunk/include/clang/Frontend/PCHWriter.h Tue Apr 21 18:56:24 2009
@@ -98,6 +98,14 @@
   /// table, shifted left by one bit with the low bit set.
   llvm::SmallVector<uint64_t, 16> IdentifierOffsets;
 
+  /// \brief Offsets of each of the macro identifiers into the
+  /// bitstream.
+  ///
+  /// For each identifier that is associated with a macro, this map
+  /// provides the offset into the bitstream where that macro is
+  /// defined.
+  llvm::DenseMap<const IdentifierInfo *, uint64_t> MacroOffsets;
+
   /// \brief Declarations encountered that might be external
   /// definitions.
   ///
@@ -125,6 +133,9 @@
   /// \brief The number of statements written to the PCH file.
   unsigned NumStatements;
 
+  /// \brief The number of macros written to the PCH file.
+  unsigned NumMacros;
+
   void WriteTargetTriple(const TargetInfo &Target);
   void WriteLanguageOptions(const LangOptions &LangOpts);
   void WriteSourceManagerBlock(SourceManager &SourceMgr);
@@ -134,7 +145,7 @@
   uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC);
   uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
   void WriteDeclsBlock(ASTContext &Context);
-  void WriteIdentifierTable();
+  void WriteIdentifierTable(Preprocessor &PP);
   void WriteAttributeRecord(const Attr *Attr);
 
 public:
@@ -160,6 +171,16 @@
   /// \brief Emit a reference to an identifier
   void AddIdentifierRef(const IdentifierInfo *II, RecordData &Record);
 
+  /// \brief Retrieve the offset of the macro definition for the given
+  /// identifier.
+  ///
+  /// The identifier must refer to a macro.
+  uint64_t getMacroOffset(const IdentifierInfo *II) {
+    assert(MacroOffsets.find(II) != MacroOffsets.end() && 
+           "Identifier does not name a macro");
+    return MacroOffsets[II];
+  }
+
   /// \brief Emit a reference to a type.
   void AddTypeRef(QualType T, RecordData &Record);
 

Modified: cfe/trunk/lib/Frontend/PCHReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHReader.cpp?rev=69755&r1=69754&r2=69755&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHReader.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHReader.cpp Tue Apr 21 18:56:24 2009
@@ -36,6 +36,23 @@
 
 using namespace clang;
 
+namespace {
+  /// \brief Helper class that saves the current stream position and
+  /// then restores it when destroyed.
+  struct VISIBILITY_HIDDEN SavedStreamPosition {
+    explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
+      : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
+
+    ~SavedStreamPosition() {
+      Stream.JumpToBit(Offset);
+    }
+
+  private:
+    llvm::BitstreamReader &Stream;
+    uint64_t Offset;
+  };
+}
+
 //===----------------------------------------------------------------------===//
 // Declaration deserialization
 //===----------------------------------------------------------------------===//
@@ -1053,6 +1070,8 @@
     using namespace clang::io;
     uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these?
     (void)Bits;
+    bool hasMacroDefinition = (Bits >> 3) & 0x01;
+    
     pch::IdentID ID = ReadUnalignedLE32(d);
     DataLen -= 8;
 
@@ -1064,8 +1083,13 @@
                                                  k.first, k.first + k.second);
     Reader.SetIdentifierInfo(ID, II);
 
-    // FIXME: If this identifier is a macro, deserialize the macro
-    // definition now.
+    // If this identifier is a macro, deserialize the macro
+    // definition.
+    if (hasMacroDefinition) {
+      uint32_t Offset = ReadUnalignedLE64(d);
+      Reader.ReadMacroRecord(Offset);
+      DataLen -= 8;
+    }
 
     // Read all of the declarations visible at global scope with this
     // name.
@@ -1323,27 +1347,28 @@
   }
 }
 
-bool PCHReader::ReadPreprocessorBlock() {
-  if (Stream.EnterSubBlock(pch::PREPROCESSOR_BLOCK_ID))
-    return Error("Malformed preprocessor block record");
-  
+void PCHReader::ReadMacroRecord(uint64_t Offset) {
+  // Keep track of where we are in the stream, then jump back there
+  // after reading this macro.
+  SavedStreamPosition SavedPosition(Stream);
+
+  Stream.JumpToBit(Offset);
   RecordData Record;
   llvm::SmallVector<IdentifierInfo*, 16> MacroArgs;
-  MacroInfo *LastMacro = 0;
-  
+  MacroInfo *Macro = 0;
   while (true) {
     unsigned Code = Stream.ReadCode();
     switch (Code) {
     case llvm::bitc::END_BLOCK:
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of preprocessor block");
-      return false;
-    
+      return;
+
     case llvm::bitc::ENTER_SUBBLOCK:
       // No known subblocks, always skip them.
       Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
+      if (Stream.SkipBlock()) {
+        Error("Malformed block record");
+        return;
+      }
       continue;
     
     case llvm::bitc::DEFINE_ABBREV:
@@ -1351,24 +1376,29 @@
       continue;
     default: break;
     }
-    
+
     // Read a record.
     Record.clear();
     pch::PreprocessorRecordTypes RecType =
       (pch::PreprocessorRecordTypes)Stream.ReadRecord(Code, Record);
     switch (RecType) {
-    default:  // Default behavior: ignore unknown records.
-      break;
     case pch::PP_COUNTER_VALUE:
-      if (!Record.empty())
-        PP.setCounterValue(Record[0]);
+      // Skip this record.
       break;
 
     case pch::PP_MACRO_OBJECT_LIKE:
     case pch::PP_MACRO_FUNCTION_LIKE: {
+      // If we already have a macro, that means that we've hit the end
+      // of the definition of the macro we were looking for. We're
+      // done.
+      if (Macro)
+        return;
+
       IdentifierInfo *II = DecodeIdentifierInfo(Record[0]);
-      if (II == 0)
-        return Error("Macro must have a name");
+      if (II == 0) {
+        Error("Macro must have a name");
+        return;
+      }
       SourceLocation Loc = SourceLocation::getFromRawEncoding(Record[1]);
       bool isUsed = Record[2];
       
@@ -1397,14 +1427,15 @@
 
       // Remember that we saw this macro last so that we add the tokens that
       // form its body to it.
-      LastMacro = MI;
+      Macro = MI;
+      ++NumMacrosRead;
       break;
     }
         
     case pch::PP_TOKEN: {
-      // If we see a TOKEN before a PP_MACRO_*, then the file is eroneous, just
-      // pretend we didn't see this.
-      if (LastMacro == 0) break;
+      // If we see a TOKEN before a PP_MACRO_*, then the file is
+      // erroneous, just pretend we didn't see this.
+      if (Macro == 0) break;
       
       Token Tok;
       Tok.startToken();
@@ -1414,13 +1445,60 @@
         Tok.setIdentifierInfo(II);
       Tok.setKind((tok::TokenKind)Record[3]);
       Tok.setFlag((Token::TokenFlags)Record[4]);
-      LastMacro->AddTokenToBody(Tok);
+      Macro->AddTokenToBody(Tok);
       break;
     }
     }
   }
 }
 
+bool PCHReader::ReadPreprocessorBlock() {
+  if (Stream.EnterSubBlock(pch::PREPROCESSOR_BLOCK_ID))
+    return Error("Malformed preprocessor block record");
+  
+  RecordData Record;
+  while (true) {
+    unsigned Code = Stream.ReadCode();
+    switch (Code) {
+    case llvm::bitc::END_BLOCK:
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of preprocessor block");
+      return false;
+    
+    case llvm::bitc::ENTER_SUBBLOCK:
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    
+    case llvm::bitc::DEFINE_ABBREV:
+      Stream.ReadAbbrevRecord();
+      continue;
+    default: break;
+    }
+    
+    // Read a record.
+    Record.clear();
+    pch::PreprocessorRecordTypes RecType =
+      (pch::PreprocessorRecordTypes)Stream.ReadRecord(Code, Record);
+    switch (RecType) {
+    default:  // Default behavior: ignore unknown records.
+      break;
+    case pch::PP_COUNTER_VALUE:
+      if (!Record.empty())
+        PP.setCounterValue(Record[0]);
+      break;
+
+    case pch::PP_MACRO_OBJECT_LIKE:
+    case pch::PP_MACRO_FUNCTION_LIKE:
+    case pch::PP_TOKEN:
+      // Once we've hit a macro definition or a token, we're done.
+      return false;
+    }
+  }
+}
+
 PCHReader::PCHReadResult 
 PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) {
   if (Stream.EnterSubBlock(pch::PCH_BLOCK_ID)) {
@@ -1573,6 +1651,7 @@
 
     case pch::STATISTICS:
       TotalNumStatements = Record[0];
+      TotalNumMacros = Record[1];
       break;
 
     }
@@ -1582,23 +1661,6 @@
   return Failure;
 }
 
-namespace {
-  /// \brief Helper class that saves the current stream position and
-  /// then restores it when destroyed.
-  struct VISIBILITY_HIDDEN SavedStreamPosition {
-    explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
-      : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
-
-    ~SavedStreamPosition() {
-      Stream.JumpToBit(Offset);
-    }
-
-  private:
-    llvm::BitstreamReader &Stream;
-    uint64_t Offset;
-  };
-}
-
 PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
   // Set the PCH file name.
   this->FileName = FileName;
@@ -2364,6 +2426,9 @@
   std::fprintf(stderr, "  %u/%u statements read (%f%%)\n",
                NumStatementsRead, TotalNumStatements,
                ((float)NumStatementsRead/TotalNumStatements * 100));
+  std::fprintf(stderr, "  %u/%u macros read (%f%%)\n",
+               NumMacrosRead, TotalNumMacros,
+               ((float)NumMacrosRead/TotalNumMacros * 100));
   std::fprintf(stderr, "\n");
 }
 

Modified: cfe/trunk/lib/Frontend/PCHWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PCHWriter.cpp?rev=69755&r1=69754&r2=69755&view=diff

==============================================================================
--- cfe/trunk/lib/Frontend/PCHWriter.cpp (original)
+++ cfe/trunk/lib/Frontend/PCHWriter.cpp Tue Apr 21 18:56:24 2009
@@ -1439,8 +1439,6 @@
   
   // Loop over all the macro definitions that are live at the end of the file,
   // emitting each to the PP section.
-  // FIXME: Eventually we want to emit an index so that we can lazily load
-  // macros.
   for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
        I != E; ++I) {
     // FIXME: This emits macros in hash table order, we should do it in a stable
@@ -1452,7 +1450,9 @@
     if (MI->isBuiltinMacro())
       continue;
 
+    // FIXME: Remove this identifier reference?
     AddIdentifierRef(I->first, Record);
+    MacroOffsets[I->first] = Stream.GetCurrentBitNo();
     Record.push_back(MI->getDefinitionLoc().getRawEncoding());
     Record.push_back(MI->isUsed());
     
@@ -1494,7 +1494,7 @@
       Stream.EmitRecord(pch::PP_TOKEN, Record);
       Record.clear();
     }
-    
+    ++NumMacros;
   }
   
   Stream.ExitBlock();
@@ -1715,6 +1715,7 @@
 namespace {
 class VISIBILITY_HIDDEN PCHIdentifierTableTrait {
   PCHWriter &Writer;
+  Preprocessor &PP;
 
 public:
   typedef const IdentifierInfo* key_type;
@@ -1723,19 +1724,23 @@
   typedef pch::IdentID data_type;
   typedef data_type data_type_ref;
   
-  PCHIdentifierTableTrait(PCHWriter &Writer) : Writer(Writer) { }
+  PCHIdentifierTableTrait(PCHWriter &Writer, Preprocessor &PP) 
+    : Writer(Writer), PP(PP) { }
 
   static unsigned ComputeHash(const IdentifierInfo* II) {
     return clang::BernsteinHash(II->getName());
   }
   
-  static std::pair<unsigned,unsigned> 
+  std::pair<unsigned,unsigned> 
     EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II, 
                       pch::IdentID ID) {
     unsigned KeyLen = strlen(II->getName()) + 1;
     clang::io::Emit16(Out, KeyLen);
     unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
                               // 4 bytes for the persistent ID
+    if (II->hasMacroDefinition() && 
+        !PP.getMacroInfo(const_cast<IdentifierInfo *>(II))->isBuiltinMacro())
+      DataLen += 8;
     for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
                                    DEnd = IdentifierResolver::end();
          D != DEnd; ++D)
@@ -1755,15 +1760,21 @@
   void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II, 
                 pch::IdentID ID, unsigned) {
     uint32_t Bits = 0;
+    bool hasMacroDefinition = 
+      II->hasMacroDefinition() && 
+      !PP.getMacroInfo(const_cast<IdentifierInfo *>(II))->isBuiltinMacro();
     Bits = Bits | (uint32_t)II->getTokenID();
     Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID();
-    Bits = (Bits << 10) | II->hasMacroDefinition();
+    Bits = (Bits << 10) | hasMacroDefinition;
     Bits = (Bits << 1) | II->isExtensionToken();
     Bits = (Bits << 1) | II->isPoisoned();
     Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword();
     clang::io::Emit32(Out, Bits);
     clang::io::Emit32(Out, ID);
 
+    if (hasMacroDefinition)
+      clang::io::Emit64(Out, Writer.getMacroOffset(II));
+
     // Emit the declaration IDs in reverse order, because the
     // IdentifierResolver provides the declarations as they would be
     // visible (e.g., the function "stat" would come before the struct
@@ -1785,7 +1796,7 @@
 /// The identifier table consists of a blob containing string data
 /// (the actual identifiers themselves) and a separate "offsets" index
 /// that maps identifier IDs to locations within the blob.
-void PCHWriter::WriteIdentifierTable() {
+void PCHWriter::WriteIdentifierTable(Preprocessor &PP) {
   using namespace llvm;
 
   // Create and write out the blob that contains the identifier
@@ -1806,7 +1817,7 @@
     llvm::SmallVector<char, 4096> IdentifierTable; 
     uint32_t BucketOffset;
     {
-      PCHIdentifierTableTrait Trait(*this);
+      PCHIdentifierTableTrait Trait(*this, PP);
       llvm::raw_svector_ostream Out(IdentifierTable);
       BucketOffset = Generator.Emit(Out, Trait);
     }
@@ -1964,7 +1975,8 @@
 }
 
 PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream) 
-  : Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), NumStatements(0) { }
+  : Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), 
+    NumStatements(0), NumMacros(0) { }
 
 void PCHWriter::WritePCH(Sema &SemaRef) {
   ASTContext &Context = SemaRef.Context;
@@ -1989,7 +2001,7 @@
   WritePreprocessor(PP);
   WriteTypesBlock(Context);
   WriteDeclsBlock(Context);
-  WriteIdentifierTable();
+  WriteIdentifierTable(PP);
   Stream.EmitRecord(pch::TYPE_OFFSET, TypeOffsets);
   Stream.EmitRecord(pch::DECL_OFFSET, DeclOffsets);
 
@@ -2004,6 +2016,7 @@
   // Some simple statistics
   Record.clear();
   Record.push_back(NumStatements);
+  Record.push_back(NumMacros);
   Stream.EmitRecord(pch::STATISTICS, Record);
   Stream.ExitBlock();
 }





More information about the cfe-commits mailing list