r242650 - [modules] Don't save uninteresting identifiers, and don't consider identifiers

Richard Smith richard-llvm at metafoo.co.uk
Sun Jul 19 14:41:12 PDT 2015


Author: rsmith
Date: Sun Jul 19 16:41:12 2015
New Revision: 242650

URL: http://llvm.org/viewvc/llvm-project?rev=242650&view=rev
Log:
[modules] Don't save uninteresting identifiers, and don't consider identifiers
to be interesting just because they are the name of a builtin. Reduces the size
of an empty module by over 80% (~100KB).

Modified:
    cfe/trunk/include/clang/Basic/IdentifierTable.h
    cfe/trunk/include/clang/Serialization/Module.h
    cfe/trunk/lib/Parse/Parser.cpp
    cfe/trunk/lib/Sema/SemaDecl.cpp
    cfe/trunk/lib/Serialization/ASTReader.cpp
    cfe/trunk/lib/Serialization/ASTWriter.cpp
    cfe/trunk/test/Modules/empty.modulemap

Modified: cfe/trunk/include/clang/Basic/IdentifierTable.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/IdentifierTable.h?rev=242650&r1=242649&r2=242650&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/IdentifierTable.h (original)
+++ cfe/trunk/include/clang/Basic/IdentifierTable.h Sun Jul 19 16:41:12 2015
@@ -161,7 +161,7 @@ public:
   /// TokenID is normally read-only but there are 2 instances where we revert it
   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
   /// using this method so we can inform serialization about it.
-  void RevertTokenIDToIdentifier() {
+  void revertTokenIDToIdentifier() {
     assert(TokenID != tok::identifier && "Already at tok::identifier");
     TokenID = tok::identifier;
     RevertedTokenID = true;
@@ -183,6 +183,18 @@ public:
   }
   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
 
+  /// \brief True if setNotBuiltin() was called.
+  bool hasRevertedBuiltin() const {
+    return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
+  }
+
+  /// \brief Revert the identifier to a non-builtin identifier. We do this if
+  /// the name of a known builtin library function is used to declare that
+  /// function, but an unexpected type is specified.
+  void revertBuiltin() {
+    setBuiltinID(0);
+  }
+
   /// \brief Return a value indicating whether this is a builtin function.
   ///
   /// 0 is not-built-in.  1 is builtin-for-some-nonprimary-target.

Modified: cfe/trunk/include/clang/Serialization/Module.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/Module.h?rev=242650&r1=242649&r2=242650&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/Module.h (original)
+++ cfe/trunk/include/clang/Serialization/Module.h Sun Jul 19 16:41:12 2015
@@ -476,6 +476,11 @@ public:
   /// any point during translation.
   bool isDirectlyImported() const { return DirectlyImported; }
 
+  /// \brief Is this a module file for a module (rather than a PCH or similar).
+  bool isModule() const {
+    return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule;
+  }
+
   /// \brief Dump debugging output for this module.
   void dump();
 };

Modified: cfe/trunk/lib/Parse/Parser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/Parser.cpp?rev=242650&r1=242649&r2=242650&view=diff
==============================================================================
--- cfe/trunk/lib/Parse/Parser.cpp (original)
+++ cfe/trunk/lib/Parse/Parser.cpp Sun Jul 19 16:41:12 2015
@@ -1489,7 +1489,7 @@ bool Parser::TryKeywordIdentFallback(boo
     << PP.getSpelling(Tok)
     << DisableKeyword;
   if (DisableKeyword)
-    Tok.getIdentifierInfo()->RevertTokenIDToIdentifier();
+    Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
   Tok.setKind(tok::identifier);
   return true;
 }

Modified: cfe/trunk/lib/Sema/SemaDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDecl.cpp?rev=242650&r1=242649&r2=242650&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaDecl.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDecl.cpp Sun Jul 19 16:41:12 2015
@@ -3115,7 +3115,7 @@ bool Sema::MergeFunctionDecl(FunctionDec
       // remain visible, a single bogus local redeclaration (which is
       // actually only a warning) could break all the downstream code.
       if (!New->getLexicalDeclContext()->isFunctionOrMethod())
-        New->getIdentifier()->setBuiltinID(Builtin::NotBuiltin);
+        New->getIdentifier()->revertBuiltin();
 
       return false;
     }

Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=242650&r1=242649&r2=242650&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Sun Jul 19 16:41:12 2015
@@ -735,10 +735,10 @@ ASTIdentifierLookupTraitBase::ReadKey(co
 }
 
 /// \brief Whether the given identifier is "interesting".
-static bool isInterestingIdentifier(IdentifierInfo &II) {
+static bool isInterestingIdentifier(IdentifierInfo &II, bool IsModule) {
   return II.hadMacroDefinition() ||
          II.isPoisoned() ||
-         II.getObjCOrBuiltinID() ||
+         (IsModule ? II.hasRevertedBuiltin() : II.getObjCOrBuiltinID()) ||
          II.hasRevertedTokenIDToIdentifier() ||
          II.getFETokenInfo<void>();
 }
@@ -767,7 +767,7 @@ IdentifierInfo *ASTIdentifierLookupTrait
   }
   if (!II->isFromAST()) {
     II->setIsFromAST();
-    if (isInterestingIdentifier(*II))
+    if (isInterestingIdentifier(*II, F.isModule()))
       II->setChangedSinceDeserialization();
   }
   Reader.markIdentifierUpToDate(II);
@@ -784,6 +784,7 @@ IdentifierInfo *ASTIdentifierLookupTrait
   unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d);
   bool CPlusPlusOperatorKeyword = readBit(Bits);
   bool HasRevertedTokenIDToIdentifier = readBit(Bits);
+  bool HasRevertedBuiltin = readBit(Bits);
   bool Poisoned = readBit(Bits);
   bool ExtensionToken = readBit(Bits);
   bool HadMacroDefinition = readBit(Bits);
@@ -794,8 +795,15 @@ IdentifierInfo *ASTIdentifierLookupTrait
   // Set or check the various bits in the IdentifierInfo structure.
   // Token IDs are read-only.
   if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier)
-    II->RevertTokenIDToIdentifier();
-  II->setObjCOrBuiltinID(ObjCOrBuiltinID);
+    II->revertTokenIDToIdentifier();
+  if (!F.isModule())
+    II->setObjCOrBuiltinID(ObjCOrBuiltinID);
+  else if (HasRevertedBuiltin && II->getBuiltinID()) {
+    II->revertBuiltin();
+    assert((II->hasRevertedBuiltin() ||
+            II->getObjCOrBuiltinID() == ObjCOrBuiltinID) &&
+           "Incorrect ObjC keyword or builtin ID");
+  }
   assert(II->isExtensionToken() == ExtensionToken &&
          "Incorrect extension token flag");
   (void)ExtensionToken;

Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=242650&r1=242649&r2=242650&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriter.cpp Sun Jul 19 16:41:12 2015
@@ -3102,15 +3102,16 @@ class ASTIdentifierTableTrait {
   ASTWriter &Writer;
   Preprocessor &PP;
   IdentifierResolver &IdResolver;
+  bool IsModule;
   
   /// \brief Determines whether this is an "interesting" identifier that needs a
   /// full IdentifierInfo structure written into the hash table. Notably, this
   /// doesn't check whether the name has macros defined; use PublicMacroIterator
   /// to check that.
-  bool isInterestingIdentifier(IdentifierInfo *II, uint64_t MacroOffset) {
+  bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
     if (MacroOffset ||
         II->isPoisoned() ||
-        II->getObjCOrBuiltinID() ||
+        (IsModule ? II->hasRevertedBuiltin() : II->getObjCOrBuiltinID()) ||
         II->hasRevertedTokenIDToIdentifier() ||
         II->getFETokenInfo<void>())
       return true;
@@ -3129,13 +3130,17 @@ public:
   typedef unsigned offset_type;
 
   ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
-                          IdentifierResolver &IdResolver)
-      : Writer(Writer), PP(PP), IdResolver(IdResolver) {}
+                          IdentifierResolver &IdResolver, bool IsModule)
+      : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule) {}
 
   static hash_value_type ComputeHash(const IdentifierInfo* II) {
     return llvm::HashString(II->getName());
   }
 
+  bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
+    return isInterestingIdentifier(II, 0);
+  }
+
   std::pair<unsigned,unsigned>
   EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
     unsigned KeyLen = II->getLength() + 1;
@@ -3192,6 +3197,7 @@ public:
     Bits = (Bits << 1) | unsigned(HadMacroDefinition);
     Bits = (Bits << 1) | unsigned(II->isExtensionToken());
     Bits = (Bits << 1) | unsigned(II->isPoisoned());
+    Bits = (Bits << 1) | unsigned(II->hasRevertedBuiltin());
     Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
     Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword());
     LE.write<uint16_t>(Bits);
@@ -3229,7 +3235,7 @@ void ASTWriter::WriteIdentifierTable(Pre
   // strings.
   {
     llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
-    ASTIdentifierTableTrait Trait(*this, PP, IdResolver);
+    ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
 
     // Look for any identifiers that were named while processing the
     // headers, but are otherwise not needed. We add these to the hash
@@ -3245,7 +3251,8 @@ void ASTWriter::WriteIdentifierTable(Pre
     // that their order is stable.
     std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
     for (const IdentifierInfo *II : IIs)
-      getIdentifierRef(II);
+      if (Trait.isInterestingNonMacroIdentifier(II))
+        getIdentifierRef(II);
 
     // Create the on-disk hash table representation. We only store offsets
     // for identifiers that appear here for the first time.
@@ -4444,6 +4451,7 @@ void ASTWriter::WriteASTCore(Sema &SemaR
   WriteHeaderSearch(PP.getHeaderSearchInfo());
   WriteSelectors(SemaRef);
   WriteReferencedSelectorsPool(SemaRef);
+  WriteLateParsedTemplates(SemaRef);
   WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
   WriteFPPragmaOptions(SemaRef.getFPOptions());
   WriteOpenCLExtensions(SemaRef);
@@ -4559,7 +4567,6 @@ void ASTWriter::WriteASTCore(Sema &SemaR
   WriteDeclReplacementsBlock();
   WriteRedeclarations();
   WriteObjCCategories();
-  WriteLateParsedTemplates(SemaRef);
   if(!WritingModule)
     WriteOptimizePragmaOptions(SemaRef);
 

Modified: cfe/trunk/test/Modules/empty.modulemap
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/empty.modulemap?rev=242650&r1=242649&r2=242650&view=diff
==============================================================================
--- cfe/trunk/test/Modules/empty.modulemap (original)
+++ cfe/trunk/test/Modules/empty.modulemap Sun Jul 19 16:41:12 2015
@@ -10,6 +10,12 @@
 // RUN:   -emit-module -fmodule-name=empty -o %t/check.pcm \
 // RUN:   %s
 //
+// The module file should be identical each time we produce it.
 // RUN: diff %t/base.pcm %t/check.pcm
+//
+// We expect an empty module to be less than 30KB.
+// REQUIRES: shell
+// RUN: wc -c %t/base.pcm | FileCheck --check-prefix=CHECK-SIZE %s
+// CHECK-SIZE: {{^[12][0-9]{4} }}
 
 module empty { header "Inputs/empty.h" export * }





More information about the cfe-commits mailing list