r242868 - [modules] Stop performing PCM lookups for all identifiers when building with C++ modules. Instead, serialize a list of interesting identifiers and mark those ones out of date on module import. Avoiding the identifier lookups here gives a 20-30% speedup in builds with large numbers of modules. No functionality change intended.
Richard Smith
richard-llvm at metafoo.co.uk
Tue Jul 21 19:08:41 PDT 2015
Author: rsmith
Date: Tue Jul 21 21:08:40 2015
New Revision: 242868
URL: http://llvm.org/viewvc/llvm-project?rev=242868&view=rev
Log:
[modules] Stop performing PCM lookups for all identifiers when building with C++ modules. Instead, serialize a list of interesting identifiers and mark those ones out of date on module import. Avoiding the identifier lookups here gives a 20-30% speedup in builds with large numbers of modules. No functionality change intended.
Modified:
cfe/trunk/include/clang/Serialization/ASTBitCodes.h
cfe/trunk/include/clang/Serialization/Module.h
cfe/trunk/include/clang/Serialization/ModuleManager.h
cfe/trunk/lib/Sema/Sema.cpp
cfe/trunk/lib/Serialization/ASTReader.cpp
cfe/trunk/lib/Serialization/ASTWriter.cpp
cfe/trunk/lib/Serialization/ModuleManager.cpp
Modified: cfe/trunk/include/clang/Serialization/ASTBitCodes.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTBitCodes.h?rev=242868&r1=242867&r2=242868&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTBitCodes.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTBitCodes.h Tue Jul 21 21:08:40 2015
@@ -543,7 +543,10 @@ namespace clang {
/// macro definition.
MACRO_OFFSET = 47,
- // ID 48 used to be a table of macros.
+ /// \brief A list of "interesting" identifiers. Only used in C++ (where we
+ /// don't normally do lookups into the serialized identifier table). These
+ /// are eagerly deserialized.
+ INTERESTING_IDENTIFIERS = 48,
/// \brief Record code for undefined but used functions and variables that
/// need a definition in this TU.
Modified: cfe/trunk/include/clang/Serialization/Module.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/Module.h?rev=242868&r1=242867&r2=242868&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/Module.h (original)
+++ cfe/trunk/include/clang/Serialization/Module.h Tue Jul 21 21:08:40 2015
@@ -270,6 +270,10 @@ public:
/// IdentifierHashTable.
void *IdentifierLookupTable;
+ /// \brief Offsets of identifiers that we're going to preload within
+ /// IdentifierTableData.
+ std::vector<unsigned> PreloadIdentifierOffsets;
+
// === Macros ===
/// \brief The cursor to the start of the preprocessor block, which stores
Modified: cfe/trunk/include/clang/Serialization/ModuleManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ModuleManager.h?rev=242868&r1=242867&r2=242868&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ModuleManager.h (original)
+++ cfe/trunk/include/clang/Serialization/ModuleManager.h Tue Jul 21 21:08:40 2015
@@ -30,14 +30,19 @@ namespace serialization {
/// \brief Manages the set of modules loaded by an AST reader.
class ModuleManager {
- /// \brief The chain of AST files. The first entry is the one named by the
- /// user, the last one is the one that doesn't depend on anything further.
+ /// \brief The chain of AST files, in the order in which we started to load
+ /// them (this order isn't really useful for anything).
SmallVector<ModuleFile *, 2> Chain;
+ /// \brief The chain of non-module PCH files. The first entry is the one named
+ /// by the user, the last one is the one that doesn't depend on anything
+ /// further.
+ SmallVector<ModuleFile *, 2> PCHChain;
+
// \brief The roots of the dependency DAG of AST files. This is used
// to implement short-circuiting logic when running DFS over the dependencies.
SmallVector<ModuleFile *, 2> Roots;
-
+
/// \brief All loaded modules, indexed by name.
llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
@@ -138,6 +143,11 @@ public:
ModuleReverseIterator rbegin() { return Chain.rbegin(); }
/// \brief Reverse iterator end-point to traverse all loaded modules.
ModuleReverseIterator rend() { return Chain.rend(); }
+
+ /// \brief A range covering the PCH and preamble module files loaded.
+ llvm::iterator_range<ModuleConstIterator> pch_modules() const {
+ return llvm::make_range(PCHChain.begin(), PCHChain.end());
+ }
/// \brief Returns the primary module associated with the manager, that is,
/// the first module loaded
Modified: cfe/trunk/lib/Sema/Sema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/Sema.cpp?rev=242868&r1=242867&r2=242868&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/Sema.cpp (original)
+++ cfe/trunk/lib/Sema/Sema.cpp Tue Jul 21 21:08:40 2015
@@ -154,6 +154,9 @@ void Sema::Initialize() {
// will not be able to merge any duplicate __va_list_tag decls correctly.
VAListTagName = PP.getIdentifierInfo("__va_list_tag");
+ if (!TUScope)
+ return;
+
// Initialize predefined 128-bit integer types, if needed.
if (Context.getTargetInfo().hasInt128Type()) {
// If either of the 128-bit integer types are unavailable to name lookup,
Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=242868&r1=242867&r2=242868&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Tue Jul 21 21:08:40 2015
@@ -2564,6 +2564,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, u
break;
}
+ case INTERESTING_IDENTIFIERS:
+ F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end());
+ break;
+
case EAGERLY_DESERIALIZED_DECLS:
// FIXME: Skip reading this record if our ASTConsumer doesn't care
// about "interesting" decls (for instance, if we're building a module).
@@ -3410,6 +3414,18 @@ ASTReader::ASTReadResult ASTReader::Read
// SourceManager.
SourceMgr.getLoadedSLocEntryByID(Index);
}
+
+ // Preload all the pending interesting identifiers by marking them out of
+ // date.
+ for (auto Offset : F.PreloadIdentifierOffsets) {
+ const unsigned char *Data = reinterpret_cast<const unsigned char *>(
+ F.IdentifierTableData + Offset);
+
+ ASTIdentifierLookupTrait Trait(*this, F);
+ auto KeyDataLen = Trait.ReadKeyDataLength(Data);
+ auto Key = Trait.ReadKey(Data, KeyDataLen.first);
+ PP.getIdentifierTable().getOwn(Key).setOutOfDate(true);
+ }
}
// Setup the import locations and notify the module manager that we've
@@ -3430,13 +3446,20 @@ ASTReader::ASTReadResult ASTReader::Read
M->ImportLoc.getRawEncoding());
}
- // Mark all of the identifiers in the identifier table as being out of date,
- // so that various accessors know to check the loaded modules when the
- // identifier is used.
- for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
- IdEnd = PP.getIdentifierTable().end();
- Id != IdEnd; ++Id)
- Id->second->setOutOfDate(true);
+ if (!Context.getLangOpts().CPlusPlus ||
+ (Type != MK_ImplicitModule && Type != MK_ExplicitModule)) {
+ // Mark all of the identifiers in the identifier table as being out of date,
+ // so that various accessors know to check the loaded modules when the
+ // identifier is used.
+ //
+ // For C++ modules, we don't need information on many identifiers (just
+ // those that provide macros or are poisoned), so we mark all of
+ // the interesting ones via PreloadIdentifierOffsets.
+ for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
+ IdEnd = PP.getIdentifierTable().end();
+ Id != IdEnd; ++Id)
+ Id->second->setOutOfDate(true);
+ }
// Resolve any unresolved module exports.
for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
@@ -6827,19 +6850,32 @@ IdentifierInfo *ASTReader::get(StringRef
// Note that we are loading an identifier.
Deserializing AnIdentifier(this);
- // If there is a global index, look there first to determine which modules
- // provably do not have any results for this identifier.
- GlobalModuleIndex::HitSet Hits;
- GlobalModuleIndex::HitSet *HitsPtr = nullptr;
- if (!loadGlobalIndex()) {
- if (GlobalIndex->lookupIdentifier(Name, Hits)) {
- HitsPtr = &Hits;
- }
- }
IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0,
NumIdentifierLookups,
NumIdentifierLookupHits);
- ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
+
+ // We don't need to do identifier table lookups in C++ modules (we preload
+ // all interesting declarations, and don't need to use the scope for name
+ // lookups). Perform the lookup in PCH files, though, since we don't build
+ // a complete initial identifier table if we're carrying on from a PCH.
+ if (Context.getLangOpts().CPlusPlus) {
+ for (auto F : ModuleMgr.pch_modules())
+ if (Visitor.visit(*F, &Visitor))
+ break;
+ } else {
+ // If there is a global index, look there first to determine which modules
+ // provably do not have any results for this identifier.
+ GlobalModuleIndex::HitSet Hits;
+ GlobalModuleIndex::HitSet *HitsPtr = nullptr;
+ if (!loadGlobalIndex()) {
+ if (GlobalIndex->lookupIdentifier(Name, Hits)) {
+ HitsPtr = &Hits;
+ }
+ }
+
+ ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
+ }
+
IdentifierInfo *II = Visitor.getIdentifierInfo();
markIdentifierUpToDate(II);
return II;
Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=242868&r1=242867&r2=242868&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriter.cpp Tue Jul 21 21:08:40 2015
@@ -3104,6 +3104,7 @@ class ASTIdentifierTableTrait {
IdentifierResolver &IdResolver;
bool IsModule;
bool NeedDecls;
+ ASTWriter::RecordData *InterestingIdentifierOffsets;
/// \brief Determines whether this is an "interesting" identifier that needs a
/// full IdentifierInfo structure written into the hash table. Notably, this
@@ -3131,14 +3132,20 @@ public:
typedef unsigned offset_type;
ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
- IdentifierResolver &IdResolver, bool IsModule)
+ IdentifierResolver &IdResolver, bool IsModule,
+ ASTWriter::RecordData *InterestingIdentifierOffsets)
: Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
- NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus) {}
+ NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
+ InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}
static hash_value_type ComputeHash(const IdentifierInfo* II) {
return llvm::HashString(II->getName());
}
+ bool isInterestingIdentifier(const IdentifierInfo *II) {
+ auto MacroOffset = Writer.getMacroDirectivesOffset(II);
+ return isInterestingIdentifier(II, MacroOffset);
+ }
bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
return isInterestingIdentifier(II, 0);
}
@@ -3178,6 +3185,12 @@ public:
// Record the location of the key data. This is used when generating
// the mapping from persistent IDs to strings.
Writer.SetIdentifierOffset(II, Out.tell());
+
+ // Emit the offset of the key/data length information to the interesting
+ // identifiers table if necessary.
+ if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
+ InterestingIdentifierOffsets->push_back(Out.tell() - 4);
+
Out.write(II->getNameStart(), KeyLen);
}
@@ -3238,11 +3251,15 @@ void ASTWriter::WriteIdentifierTable(Pre
bool IsModule) {
using namespace llvm;
+ RecordData InterestingIdents;
+
// Create and write out the blob that contains the identifier
// strings.
{
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
- ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
+ ASTIdentifierTableTrait Trait(
+ *this, PP, IdResolver, IsModule,
+ (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
// Look for any identifiers that were named while processing the
// headers, but are otherwise not needed. We add these to the hash
@@ -3316,6 +3333,11 @@ void ASTWriter::WriteIdentifierTable(Pre
Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS);
Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
bytes(IdentifierOffsets));
+
+ // In C++, write the list of interesting identifiers (those that are
+ // defined as macros, poisoned, or similar unusual things).
+ if (!InterestingIdents.empty())
+ Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
}
//===----------------------------------------------------------------------===//
Modified: cfe/trunk/lib/Serialization/ModuleManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ModuleManager.cpp?rev=242868&r1=242867&r2=242868&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ModuleManager.cpp (original)
+++ cfe/trunk/lib/Serialization/ModuleManager.cpp Tue Jul 21 21:08:40 2015
@@ -95,6 +95,8 @@ ModuleManager::addModule(StringRef FileN
New->File = Entry;
New->ImportLoc = ImportLoc;
Chain.push_back(New);
+ if (!New->isModule())
+ PCHChain.push_back(New);
if (!ImportedBy)
Roots.push_back(New);
NewModule = true;
@@ -159,6 +161,8 @@ ModuleManager::addModule(StringRef FileN
Modules.erase(Entry);
assert(Chain.back() == ModuleEntry);
Chain.pop_back();
+ if (!ModuleEntry->isModule())
+ PCHChain.pop_back();
if (Roots.back() == ModuleEntry)
Roots.pop_back();
else
@@ -225,6 +229,15 @@ void ModuleManager::removeModules(
// Remove the modules from the chain.
Chain.erase(first, last);
+
+ // Also remove them from the PCH chain.
+ for (auto I = first; I != last; ++I) {
+ if (!(*I)->isModule()) {
+ PCHChain.erase(std::find(PCHChain.begin(), PCHChain.end(), *I),
+ PCHChain.end());
+ break;
+ }
+ }
}
void
More information about the cfe-commits
mailing list