r242868 - [modules] Stop performing PCM lookups for all identifiers when building with C++ modules. Instead, serialize a list of interesting identifiers and mark those ones out of date on module import. Avoiding the identifier lookups here gives a 20-30% speedup in builds with large numbers of modules. No functionality change intended.
Adam Nemet
anemet at apple.com
Wed Jul 22 15:37:10 PDT 2015
Hi Richard,
Looks like this caused some ASan failures:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/5677 <http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/5677>
Can you please take a look?
Thanks,
Adam
> On Jul 21, 2015, at 7:08 PM, Richard Smith <richard-llvm at metafoo.co.uk> wrote:
>
> Author: rsmith
> Date: Tue Jul 21 21:08:40 2015
> New Revision: 242868
>
> URL: http://llvm.org/viewvc/llvm-project?rev=242868&view=rev
> Log:
> [modules] Stop performing PCM lookups for all identifiers when building with C++ modules. Instead, serialize a list of interesting identifiers and mark those ones out of date on module import. Avoiding the identifier lookups here gives a 20-30% speedup in builds with large numbers of modules. No functionality change intended.
>
> Modified:
> cfe/trunk/include/clang/Serialization/ASTBitCodes.h
> cfe/trunk/include/clang/Serialization/Module.h
> cfe/trunk/include/clang/Serialization/ModuleManager.h
> cfe/trunk/lib/Sema/Sema.cpp
> cfe/trunk/lib/Serialization/ASTReader.cpp
> cfe/trunk/lib/Serialization/ASTWriter.cpp
> cfe/trunk/lib/Serialization/ModuleManager.cpp
>
> Modified: cfe/trunk/include/clang/Serialization/ASTBitCodes.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTBitCodes.h?rev=242868&r1=242867&r2=242868&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Serialization/ASTBitCodes.h (original)
> +++ cfe/trunk/include/clang/Serialization/ASTBitCodes.h Tue Jul 21 21:08:40 2015
> @@ -543,7 +543,10 @@ namespace clang {
> /// macro definition.
> MACRO_OFFSET = 47,
>
> - // ID 48 used to be a table of macros.
> + /// \brief A list of "interesting" identifiers. Only used in C++ (where we
> + /// don't normally do lookups into the serialized identifier table). These
> + /// are eagerly deserialized.
> + INTERESTING_IDENTIFIERS = 48,
>
> /// \brief Record code for undefined but used functions and variables that
> /// need a definition in this TU.
>
> Modified: cfe/trunk/include/clang/Serialization/Module.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/Module.h?rev=242868&r1=242867&r2=242868&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Serialization/Module.h (original)
> +++ cfe/trunk/include/clang/Serialization/Module.h Tue Jul 21 21:08:40 2015
> @@ -270,6 +270,10 @@ public:
> /// IdentifierHashTable.
> void *IdentifierLookupTable;
>
> + /// \brief Offsets of identifiers that we're going to preload within
> + /// IdentifierTableData.
> + std::vector<unsigned> PreloadIdentifierOffsets;
> +
> // === Macros ===
>
> /// \brief The cursor to the start of the preprocessor block, which stores
>
> Modified: cfe/trunk/include/clang/Serialization/ModuleManager.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ModuleManager.h?rev=242868&r1=242867&r2=242868&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Serialization/ModuleManager.h (original)
> +++ cfe/trunk/include/clang/Serialization/ModuleManager.h Tue Jul 21 21:08:40 2015
> @@ -30,14 +30,19 @@ namespace serialization {
>
> /// \brief Manages the set of modules loaded by an AST reader.
> class ModuleManager {
> - /// \brief The chain of AST files. The first entry is the one named by the
> - /// user, the last one is the one that doesn't depend on anything further.
> + /// \brief The chain of AST files, in the order in which we started to load
> + /// them (this order isn't really useful for anything).
> SmallVector<ModuleFile *, 2> Chain;
>
> + /// \brief The chain of non-module PCH files. The first entry is the one named
> + /// by the user, the last one is the one that doesn't depend on anything
> + /// further.
> + SmallVector<ModuleFile *, 2> PCHChain;
> +
> // \brief The roots of the dependency DAG of AST files. This is used
> // to implement short-circuiting logic when running DFS over the dependencies.
> SmallVector<ModuleFile *, 2> Roots;
> -
> +
> /// \brief All loaded modules, indexed by name.
> llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
>
> @@ -138,6 +143,11 @@ public:
> ModuleReverseIterator rbegin() { return Chain.rbegin(); }
> /// \brief Reverse iterator end-point to traverse all loaded modules.
> ModuleReverseIterator rend() { return Chain.rend(); }
> +
> + /// \brief A range covering the PCH and preamble module files loaded.
> + llvm::iterator_range<ModuleConstIterator> pch_modules() const {
> + return llvm::make_range(PCHChain.begin(), PCHChain.end());
> + }
>
> /// \brief Returns the primary module associated with the manager, that is,
> /// the first module loaded
>
> Modified: cfe/trunk/lib/Sema/Sema.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/Sema.cpp?rev=242868&r1=242867&r2=242868&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/Sema.cpp (original)
> +++ cfe/trunk/lib/Sema/Sema.cpp Tue Jul 21 21:08:40 2015
> @@ -154,6 +154,9 @@ void Sema::Initialize() {
> // will not be able to merge any duplicate __va_list_tag decls correctly.
> VAListTagName = PP.getIdentifierInfo("__va_list_tag");
>
> + if (!TUScope)
> + return;
> +
> // Initialize predefined 128-bit integer types, if needed.
> if (Context.getTargetInfo().hasInt128Type()) {
> // If either of the 128-bit integer types are unavailable to name lookup,
>
> Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=242868&r1=242867&r2=242868&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
> +++ cfe/trunk/lib/Serialization/ASTReader.cpp Tue Jul 21 21:08:40 2015
> @@ -2564,6 +2564,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, u
> break;
> }
>
> + case INTERESTING_IDENTIFIERS:
> + F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end());
> + break;
> +
> case EAGERLY_DESERIALIZED_DECLS:
> // FIXME: Skip reading this record if our ASTConsumer doesn't care
> // about "interesting" decls (for instance, if we're building a module).
> @@ -3410,6 +3414,18 @@ ASTReader::ASTReadResult ASTReader::Read
> // SourceManager.
> SourceMgr.getLoadedSLocEntryByID(Index);
> }
> +
> + // Preload all the pending interesting identifiers by marking them out of
> + // date.
> + for (auto Offset : F.PreloadIdentifierOffsets) {
> + const unsigned char *Data = reinterpret_cast<const unsigned char *>(
> + F.IdentifierTableData + Offset);
> +
> + ASTIdentifierLookupTrait Trait(*this, F);
> + auto KeyDataLen = Trait.ReadKeyDataLength(Data);
> + auto Key = Trait.ReadKey(Data, KeyDataLen.first);
> + PP.getIdentifierTable().getOwn(Key).setOutOfDate(true);
> + }
> }
>
> // Setup the import locations and notify the module manager that we've
> @@ -3430,13 +3446,20 @@ ASTReader::ASTReadResult ASTReader::Read
> M->ImportLoc.getRawEncoding());
> }
>
> - // Mark all of the identifiers in the identifier table as being out of date,
> - // so that various accessors know to check the loaded modules when the
> - // identifier is used.
> - for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
> - IdEnd = PP.getIdentifierTable().end();
> - Id != IdEnd; ++Id)
> - Id->second->setOutOfDate(true);
> + if (!Context.getLangOpts().CPlusPlus ||
> + (Type != MK_ImplicitModule && Type != MK_ExplicitModule)) {
> + // Mark all of the identifiers in the identifier table as being out of date,
> + // so that various accessors know to check the loaded modules when the
> + // identifier is used.
> + //
> + // For C++ modules, we don't need information on many identifiers (just
> + // those that provide macros or are poisoned), so we mark all of
> + // the interesting ones via PreloadIdentifierOffsets.
> + for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
> + IdEnd = PP.getIdentifierTable().end();
> + Id != IdEnd; ++Id)
> + Id->second->setOutOfDate(true);
> + }
>
> // Resolve any unresolved module exports.
> for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
> @@ -6827,19 +6850,32 @@ IdentifierInfo *ASTReader::get(StringRef
> // Note that we are loading an identifier.
> Deserializing AnIdentifier(this);
>
> - // If there is a global index, look there first to determine which modules
> - // provably do not have any results for this identifier.
> - GlobalModuleIndex::HitSet Hits;
> - GlobalModuleIndex::HitSet *HitsPtr = nullptr;
> - if (!loadGlobalIndex()) {
> - if (GlobalIndex->lookupIdentifier(Name, Hits)) {
> - HitsPtr = &Hits;
> - }
> - }
> IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0,
> NumIdentifierLookups,
> NumIdentifierLookupHits);
> - ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
> +
> + // We don't need to do identifier table lookups in C++ modules (we preload
> + // all interesting declarations, and don't need to use the scope for name
> + // lookups). Perform the lookup in PCH files, though, since we don't build
> + // a complete initial identifier table if we're carrying on from a PCH.
> + if (Context.getLangOpts().CPlusPlus) {
> + for (auto F : ModuleMgr.pch_modules())
> + if (Visitor.visit(*F, &Visitor))
> + break;
> + } else {
> + // If there is a global index, look there first to determine which modules
> + // provably do not have any results for this identifier.
> + GlobalModuleIndex::HitSet Hits;
> + GlobalModuleIndex::HitSet *HitsPtr = nullptr;
> + if (!loadGlobalIndex()) {
> + if (GlobalIndex->lookupIdentifier(Name, Hits)) {
> + HitsPtr = &Hits;
> + }
> + }
> +
> + ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
> + }
> +
> IdentifierInfo *II = Visitor.getIdentifierInfo();
> markIdentifierUpToDate(II);
> return II;
>
> Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=242868&r1=242867&r2=242868&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
> +++ cfe/trunk/lib/Serialization/ASTWriter.cpp Tue Jul 21 21:08:40 2015
> @@ -3104,6 +3104,7 @@ class ASTIdentifierTableTrait {
> IdentifierResolver &IdResolver;
> bool IsModule;
> bool NeedDecls;
> + ASTWriter::RecordData *InterestingIdentifierOffsets;
>
> /// \brief Determines whether this is an "interesting" identifier that needs a
> /// full IdentifierInfo structure written into the hash table. Notably, this
> @@ -3131,14 +3132,20 @@ public:
> typedef unsigned offset_type;
>
> ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
> - IdentifierResolver &IdResolver, bool IsModule)
> + IdentifierResolver &IdResolver, bool IsModule,
> + ASTWriter::RecordData *InterestingIdentifierOffsets)
> : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
> - NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus) {}
> + NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
> + InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}
>
> static hash_value_type ComputeHash(const IdentifierInfo* II) {
> return llvm::HashString(II->getName());
> }
>
> + bool isInterestingIdentifier(const IdentifierInfo *II) {
> + auto MacroOffset = Writer.getMacroDirectivesOffset(II);
> + return isInterestingIdentifier(II, MacroOffset);
> + }
> bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
> return isInterestingIdentifier(II, 0);
> }
> @@ -3178,6 +3185,12 @@ public:
> // Record the location of the key data. This is used when generating
> // the mapping from persistent IDs to strings.
> Writer.SetIdentifierOffset(II, Out.tell());
> +
> + // Emit the offset of the key/data length information to the interesting
> + // identifiers table if necessary.
> + if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
> + InterestingIdentifierOffsets->push_back(Out.tell() - 4);
> +
> Out.write(II->getNameStart(), KeyLen);
> }
>
> @@ -3238,11 +3251,15 @@ void ASTWriter::WriteIdentifierTable(Pre
> bool IsModule) {
> using namespace llvm;
>
> + RecordData InterestingIdents;
> +
> // Create and write out the blob that contains the identifier
> // strings.
> {
> llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
> - ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
> + ASTIdentifierTableTrait Trait(
> + *this, PP, IdResolver, IsModule,
> + (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
>
> // Look for any identifiers that were named while processing the
> // headers, but are otherwise not needed. We add these to the hash
> @@ -3316,6 +3333,11 @@ void ASTWriter::WriteIdentifierTable(Pre
> Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS);
> Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
> bytes(IdentifierOffsets));
> +
> + // In C++, write the list of interesting identifiers (those that are
> + // defined as macros, poisoned, or similar unusual things).
> + if (!InterestingIdents.empty())
> + Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
> }
>
> //===----------------------------------------------------------------------===//
>
> Modified: cfe/trunk/lib/Serialization/ModuleManager.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ModuleManager.cpp?rev=242868&r1=242867&r2=242868&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Serialization/ModuleManager.cpp (original)
> +++ cfe/trunk/lib/Serialization/ModuleManager.cpp Tue Jul 21 21:08:40 2015
> @@ -95,6 +95,8 @@ ModuleManager::addModule(StringRef FileN
> New->File = Entry;
> New->ImportLoc = ImportLoc;
> Chain.push_back(New);
> + if (!New->isModule())
> + PCHChain.push_back(New);
> if (!ImportedBy)
> Roots.push_back(New);
> NewModule = true;
> @@ -159,6 +161,8 @@ ModuleManager::addModule(StringRef FileN
> Modules.erase(Entry);
> assert(Chain.back() == ModuleEntry);
> Chain.pop_back();
> + if (!ModuleEntry->isModule())
> + PCHChain.pop_back();
> if (Roots.back() == ModuleEntry)
> Roots.pop_back();
> else
> @@ -225,6 +229,15 @@ void ModuleManager::removeModules(
>
> // Remove the modules from the chain.
> Chain.erase(first, last);
> +
> + // Also remove them from the PCH chain.
> + for (auto I = first; I != last; ++I) {
> + if (!(*I)->isModule()) {
> + PCHChain.erase(std::find(PCHChain.begin(), PCHChain.end(), *I),
> + PCHChain.end());
> + break;
> + }
> + }
> }
>
> void
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150722/e127c00b/attachment.html>
More information about the cfe-commits
mailing list