[clang] [clang][deps] Only write preprocessor info into PCMs (PR #115239)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 6 16:02:39 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-modules
Author: Jan Svoboda (jansvoboda11)
<details>
<summary>Changes</summary>
This patch builds on top of https://github.com/llvm/llvm-project/pull/115237 and https://github.com/llvm/llvm-project/pull/115235, only passing the `Preprocessor` object to `ASTWriter`. This reduces the size of scanning PCM files by 1/3 and speeds up scans by 16%.
---
Patch is 51.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115239.diff
10 Files Affected:
- (modified) clang/include/clang/Lex/HeaderSearchOptions.h (+5)
- (modified) clang/include/clang/Serialization/ASTRecordWriter.h (+4-3)
- (modified) clang/include/clang/Serialization/ASTWriter.h (+21-28)
- (modified) clang/lib/Frontend/ASTUnit.cpp (+1-1)
- (modified) clang/lib/Serialization/ASTReader.cpp (+3)
- (modified) clang/lib/Serialization/ASTWriter.cpp (+149-127)
- (modified) clang/lib/Serialization/ASTWriterDecl.cpp (+30-25)
- (modified) clang/lib/Serialization/ASTWriterStmt.cpp (+8-7)
- (modified) clang/lib/Serialization/GeneratePCH.cpp (+14-5)
- (modified) clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp (+1)
``````````diff
diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index c85e3d27281701..7a16926c186d2c 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -255,6 +255,10 @@ class HeaderSearchOptions {
LLVM_PREFERRED_TYPE(bool)
unsigned ModulesHashContent : 1;
+ /// Whether AST files should only contain the preprocessor information.
+ LLVM_PREFERRED_TYPE(bool)
+ unsigned ModulesSerializeOnlyPreprocessor : 1;
+
/// Whether we should include all things that could impact the module in the
/// hash.
///
@@ -288,6 +292,7 @@ class HeaderSearchOptions {
ModulesSkipHeaderSearchPaths(false),
ModulesSkipPragmaDiagnosticMappings(false),
ModulesPruneNonAffectingModuleMaps(true), ModulesHashContent(false),
+ ModulesSerializeOnlyPreprocessor(false),
ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false),
AllowModuleMapSubdirectorySearch(true) {}
diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h
index d6090ba1a6c690..67720a0aebc1ca 100644
--- a/clang/include/clang/Serialization/ASTRecordWriter.h
+++ b/clang/include/clang/Serialization/ASTRecordWriter.h
@@ -60,8 +60,9 @@ class ASTRecordWriter
public:
/// Construct a ASTRecordWriter that uses the default encoding scheme.
- ASTRecordWriter(ASTWriter &W, ASTWriter::RecordDataImpl &Record)
- : DataStreamBasicWriter(W.getASTContext()), Writer(&W), Record(&Record) {}
+ ASTRecordWriter(ASTContext &Context, ASTWriter &W,
+ ASTWriter::RecordDataImpl &Record)
+ : DataStreamBasicWriter(Context), Writer(&W), Record(&Record) {}
/// Construct a ASTRecordWriter that uses the same encoding scheme as another
/// ASTRecordWriter.
@@ -208,7 +209,7 @@ class ASTRecordWriter
/// Emit a reference to a type.
void AddTypeRef(QualType T) {
- return Writer->AddTypeRef(T, *Record);
+ return Writer->AddTypeRef(getASTContext(), T, *Record);
}
void writeQualType(QualType T) {
AddTypeRef(T);
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index d0e841f367c1e0..51253ea3c63ec7 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -119,9 +119,6 @@ class ASTWriter : public ASTDeserializationListener,
/// The PCM manager which manages memory buffers for pcm files.
InMemoryModuleCache &ModuleCache;
- /// The ASTContext we're writing.
- ASTContext *Context = nullptr;
-
/// The preprocessor we're writing.
Preprocessor *PP = nullptr;
@@ -545,7 +542,7 @@ class ASTWriter : public ASTDeserializationListener,
unsigned getSubmoduleID(Module *Mod);
/// Write the given subexpression to the bitstream.
- void WriteSubStmt(Stmt *S);
+ void WriteSubStmt(ASTContext &Context, Stmt *S);
void WriteBlockInfoBlock();
void WriteControlBlock(Preprocessor &PP, StringRef isysroot);
@@ -564,34 +561,36 @@ class ASTWriter : public ASTDeserializationListener,
void WriteHeaderSearch(const HeaderSearch &HS);
void WritePreprocessorDetail(PreprocessingRecord &PPRec,
uint64_t MacroOffsetsBase);
- void WriteSubmodules(Module *WritingModule);
+ void WriteSubmodules(Module *WritingModule, ASTContext *Context);
void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
bool isModule);
unsigned TypeExtQualAbbrev = 0;
void WriteTypeAbbrevs();
- void WriteType(QualType T);
+ void WriteType(ASTContext &Context, QualType T);
bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC);
- void GenerateNameLookupTable(const DeclContext *DC,
+ void GenerateNameLookupTable(ASTContext &Context, const DeclContext *DC,
llvm::SmallVectorImpl<char> &LookupTable);
uint64_t WriteDeclContextLexicalBlock(ASTContext &Context,
const DeclContext *DC);
uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
void WriteTypeDeclOffsets();
void WriteFileDeclIDsMap();
- void WriteComments();
+ void WriteComments(ASTContext &Context);
void WriteSelectors(Sema &SemaRef);
void WriteReferencedSelectorsPool(Sema &SemaRef);
- void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver,
+ void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver *IdResolver,
bool IsModule);
void WriteDeclAndTypes(ASTContext &Context);
void PrepareWritingSpecialDecls(Sema &SemaRef);
void WriteSpecialDeclRecords(Sema &SemaRef);
- void WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord);
- void WriteDeclContextVisibleUpdate(const DeclContext *DC);
+ void WriteDeclUpdatesBlocks(ASTContext &Context,
+ RecordDataImpl &OffsetsRecord);
+ void WriteDeclContextVisibleUpdate(ASTContext &Context,
+ const DeclContext *DC);
void WriteFPPragmaOptions(const FPOptionsOverride &Opts);
void WriteOpenCLExtensions(Sema &SemaRef);
void WriteCUDAPragmas(Sema &SemaRef);
@@ -640,7 +639,7 @@ class ASTWriter : public ASTDeserializationListener,
void WriteDeclAbbrevs();
void WriteDecl(ASTContext &Context, Decl *D);
- ASTFileSignature WriteASTCore(Sema &SemaRef, StringRef isysroot,
+ ASTFileSignature WriteASTCore(Sema *SemaPtr, StringRef isysroot,
Module *WritingModule);
public:
@@ -653,11 +652,6 @@ class ASTWriter : public ASTDeserializationListener,
bool GeneratingReducedBMI = false);
~ASTWriter() override;
- ASTContext &getASTContext() const {
- assert(Context && "requested AST context when not writing AST");
- return *Context;
- }
-
const LangOptions &getLangOpts() const;
/// Get a timestamp for output into the AST file. The actual timestamp
@@ -667,8 +661,8 @@ class ASTWriter : public ASTDeserializationListener,
/// Write a precompiled header for the given semantic analysis.
///
- /// \param SemaRef a reference to the semantic analysis object that processed
- /// the AST to be written into the precompiled header.
+ /// \param Subject The object that processed the input to be written into the
+ /// AST file.
///
/// \param WritingModule The module that we are writing. If null, we are
/// writing a precompiled header.
@@ -679,8 +673,9 @@ class ASTWriter : public ASTDeserializationListener,
///
/// \return the module signature, which eventually will be a hash of
/// the module but currently is merely a random 32-bit number.
- ASTFileSignature WriteAST(Sema &SemaRef, StringRef OutputFile,
- Module *WritingModule, StringRef isysroot,
+ ASTFileSignature WriteAST(llvm::PointerUnion<Sema *, Preprocessor*> Subject,
+ StringRef OutputFile, Module *WritingModule,
+ StringRef isysroot,
bool ShouldCacheASTInMemory = false);
/// Emit a token.
@@ -723,10 +718,10 @@ class ASTWriter : public ASTDeserializationListener,
uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name);
/// Emit a reference to a type.
- void AddTypeRef(QualType T, RecordDataImpl &Record);
+ void AddTypeRef(ASTContext &Context, QualType T, RecordDataImpl &Record);
/// Force a type to be emitted and get its ID.
- serialization::TypeID GetOrCreateTypeID(QualType T);
+ serialization::TypeID GetOrCreateTypeID(ASTContext &Context, QualType T);
/// Find the first local declaration of a given local redeclarable
/// decl.
@@ -928,9 +923,9 @@ class PCHGenerator : public SemaConsumer {
void anchor() override;
Preprocessor &PP;
+ llvm::PointerUnion<Sema *, Preprocessor *> Subject;
std::string OutputFile;
std::string isysroot;
- Sema *SemaPtr;
std::shared_ptr<PCHBuffer> Buffer;
llvm::BitstreamWriter Stream;
ASTWriter Writer;
@@ -945,9 +940,7 @@ class PCHGenerator : public SemaConsumer {
bool isComplete() const { return Buffer->IsComplete; }
PCHBuffer *getBufferPtr() { return Buffer.get(); }
StringRef getOutputFile() const { return OutputFile; }
- DiagnosticsEngine &getDiagnostics() const {
- return SemaPtr->getDiagnostics();
- }
+ DiagnosticsEngine &getDiagnostics() const;
Preprocessor &getPreprocessor() { return PP; }
virtual Module *getEmittingModule(ASTContext &Ctx);
@@ -963,7 +956,7 @@ class PCHGenerator : public SemaConsumer {
bool GeneratingReducedBMI = false);
~PCHGenerator() override;
- void InitializeSema(Sema &S) override { SemaPtr = &S; }
+ void InitializeSema(Sema &S) override;
void HandleTranslationUnit(ASTContext &Ctx) override;
void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); }
ASTMutationListener *GetASTMutationListener() override;
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 4aec928f9eb0a5..f58e27a7979d1e 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -2359,7 +2359,7 @@ bool ASTUnit::Save(StringRef File) {
static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer,
Sema &S, raw_ostream &OS) {
- Writer.WriteAST(S, std::string(), nullptr, "");
+ Writer.WriteAST(&S, std::string(), nullptr, "");
// Write the generated bitstream to "Out".
if (!Buffer.empty())
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 79615dc3c018ea..8d2debf84f5ad8 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3529,6 +3529,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
break;
}
+ if (Record.empty())
+ break;
+
if (SpecialTypes.size() != Record.size())
return llvm::createStringError(std::errc::illegal_byte_sequence,
"invalid special-types record");
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index b95e29cbc02515..43fac29f32609c 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -277,8 +277,8 @@ class ASTTypeWriter {
ASTRecordWriter BasicWriter;
public:
- ASTTypeWriter(ASTWriter &Writer)
- : Writer(Writer), BasicWriter(Writer, Record) {}
+ ASTTypeWriter(ASTContext &Context, ASTWriter &Writer)
+ : Writer(Writer), BasicWriter(Context, Writer, Record) {}
uint64_t write(QualType T) {
if (T.hasLocalNonFastQualifiers()) {
@@ -2872,7 +2872,7 @@ static unsigned getNumberOfModules(Module *Mod) {
return ChildModules + 1;
}
-void ASTWriter::WriteSubmodules(Module *WritingModule) {
+void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
// Enter the submodule description block.
Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
@@ -3123,12 +3123,14 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
// Emit the reachable initializers.
// The initializer may only be unreachable in reduced BMI.
- RecordData Inits;
- for (Decl *D : Context->getModuleInitializers(Mod))
- if (wasDeclEmitted(D))
- AddDeclRef(D, Inits);
- if (!Inits.empty())
- Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits);
+ if (Context) {
+ RecordData Inits;
+ for (Decl *D : Context->getModuleInitializers(Mod))
+ if (wasDeclEmitted(D))
+ AddDeclRef(D, Inits);
+ if (!Inits.empty())
+ Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits);
+ }
// Emit the name of the re-exported module, if any.
if (!Mod->ExportAsModule.empty()) {
@@ -3259,7 +3261,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
//===----------------------------------------------------------------------===//
/// Write the representation of a type to the AST stream.
-void ASTWriter::WriteType(QualType T) {
+void ASTWriter::WriteType(ASTContext &Context, QualType T) {
TypeIdx &IdxRef = TypeIdxs[T];
if (IdxRef.getValue() == 0) // we haven't seen this type before.
IdxRef = TypeIdx(0, NextTypeID++);
@@ -3269,7 +3271,8 @@ void ASTWriter::WriteType(QualType T) {
assert(Idx.getValue() >= FirstTypeID && "Writing predefined type");
// Emit the type's representation.
- uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset;
+ uint64_t Offset =
+ ASTTypeWriter(Context, *this).write(T) - DeclTypesBlockStartOffset;
// Record the offset for this type.
uint64_t Index = Idx.getValue() - FirstTypeID;
@@ -3393,7 +3396,7 @@ void ASTWriter::WriteFileDeclIDsMap() {
Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs));
}
-void ASTWriter::WriteComments() {
+void ASTWriter::WriteComments(ASTContext &Context) {
Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3);
auto _ = llvm::make_scope_exit([this] { Stream.ExitBlock(); });
if (!PP->getPreprocessorOpts().WriteCommentListToPCH)
@@ -3406,7 +3409,7 @@ void ASTWriter::WriteComments() {
return;
RecordData Record;
- for (const auto &FO : Context->Comments.OrderedComments) {
+ for (const auto &FO : Context.Comments.OrderedComments) {
for (const auto &OC : FO.second) {
const RawComment *I = OC.second;
Record.clear();
@@ -3656,7 +3659,7 @@ void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) {
return;
RecordData Record;
- ASTRecordWriter Writer(*this, Record);
+ ASTRecordWriter Writer(SemaRef.Context, *this, Record);
// Note: this writes out all references even for a dependent AST. But it is
// very tricky to fix, and given that @selector shouldn't really appear in
@@ -3742,7 +3745,7 @@ bool IsInterestingNonMacroIdentifier(const IdentifierInfo *II,
class ASTIdentifierTableTrait {
ASTWriter &Writer;
Preprocessor &PP;
- IdentifierResolver &IdResolver;
+ IdentifierResolver *IdResolver;
bool IsModule;
bool NeedDecls;
ASTWriter::RecordData *InterestingIdentifierOffsets;
@@ -3767,7 +3770,7 @@ class ASTIdentifierTableTrait {
using offset_type = unsigned;
ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
- IdentifierResolver &IdResolver, bool IsModule,
+ IdentifierResolver *IdResolver, bool IsModule,
ASTWriter::RecordData *InterestingIdentifierOffsets)
: Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
@@ -3806,8 +3809,8 @@ class ASTIdentifierTableTrait {
if (MacroOffset)
DataLen += 4; // MacroDirectives offset.
- if (NeedDecls)
- DataLen += std::distance(IdResolver.begin(II), IdResolver.end()) *
+ if (NeedDecls && IdResolver)
+ DataLen += std::distance(IdResolver->begin(II), IdResolver->end()) *
sizeof(DeclID);
}
return emitULEBKeyDataLength(KeyLen, DataLen, Out);
@@ -3845,14 +3848,14 @@ class ASTIdentifierTableTrait {
if (HadMacroDefinition)
LE.write<uint32_t>(MacroOffset);
- if (NeedDecls) {
+ if (NeedDecls && IdResolver) {
// Emit the declaration IDs in reverse order, because the
// IdentifierResolver provides the declarations as they would be
// visible (e.g., the function "stat" would come before the struct
// "stat"), but the ASTReader adds declarations to the end of the list
// (so we need to see the struct "stat" before the function "stat").
// Only emit declarations that aren't from a chained PCH, though.
- SmallVector<NamedDecl *, 16> Decls(IdResolver.decls(II));
+ SmallVector<NamedDecl *, 16> Decls(IdResolver->decls(II));
for (NamedDecl *D : llvm::reverse(Decls))
LE.write<DeclID>((DeclID)Writer.getDeclID(
getDeclForLocalLookup(PP.getLangOpts(), D)));
@@ -3872,7 +3875,7 @@ static bool isLocalIdentifierID(IdentifierID ID) { return !(ID >> 32); }
/// (the actual identifiers themselves) and a separate "offsets" index
/// that maps identifier IDs to locations within the blob.
void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
- IdentifierResolver &IdResolver,
+ IdentifierResolver *IdResolver,
bool IsModule) {
using namespace llvm;
@@ -4137,9 +4140,9 @@ static bool isLookupResultNotInteresting(ASTWriter &Writer,
return true;
}
-void
-ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
- llvm::SmallVectorImpl<char> &LookupTable) {
+void ASTWriter::GenerateNameLookupTable(
+ ASTContext &Context, const DeclContext *ConstDC,
+ llvm::SmallVectorImpl<char> &LookupTable) {
assert(!ConstDC->hasLazyLocalLexicalLookups() &&
!ConstDC->hasLazyExternalLexicalLookups() &&
"must call buildLookups first");
@@ -4234,8 +4237,8 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
// another declaration in the redecl chain. Any non-implicit constructor or
// conversion function which doesn't occur in all the lexical contexts
// would be an ODR violation.
- auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName(
- Context->getCanonicalType(Context->getRecordType(D)));
+ auto ImplicitCtorName = Context.DeclarationNames.getCXXConstructorName(
+ Context.getCanonicalType(Context.getRecordType(D)));
if (ConstructorNameSet.erase(ImplicitCtorName))
Names.push_back(ImplicitCtorName);
@@ -4415,7 +4418,7 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
// Create the on-disk hash table in a buffer.
SmallString<4096> LookupTable;
- GenerateNameLookupTable(DC, LookupTable);
+ GenerateNameLookupTable(Context, DC, LookupTable);
// Write the lookup table
RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE};
@@ -4431,14 +4434,15 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
/// DeclContext in a dependent AST file. As such, they only exist for the TU
/// (in C++), for namespaces, and for classes with forward-declared unscoped
/// enumeration members (in C++11).
-void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) {
+void ASTWriter::WriteDeclContextVisibleUpdate(ASTContext &Context,
+ const DeclContext *DC) {
StoredDeclsMap *Map = DC->getLookupPtr();
if (!Map || Map->empty())
return;
// Create the on-disk hash table in a buffer.
SmallString<4096> LookupTable;
- GenerateNameLookupTable(DC, LookupTable);
+ GenerateNameLookupTable(Context, DC, LookupTable);
// If we're updating a namespace, select a key declaration as the key for the
// update record; those are the only ones that will be checked on reload.
@@ -4753,15 +4757,12 @@ void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) {
}
bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) {
- assert(Context && "should have context when outputting path");
-
// Leave special file names as they are.
StringRef PathStr(Path.data(), Path.size());
if (PathStr == "<built-in>" || PathStr == "<command line>")
return false;
- bool Changed =
- cleanPathForOutput(Context->getSourceManager().getFileManager(), Path);
+ bool Changed = cleanPathForOutput(PP->getFileManager(), Path);
// Remove a prefix to make the path relative, if relevant.
const char *PathBegin = Path.data();
@@ -4850,21 +4851,28 @@ ASTWriter::~ASTWriter() = default;
const LangOptions &ASTWriter::getLangOpts() const {
assert(WritingAST && "can't determine lang opts when not writing AST");
- return Context->getLangOpts();
+ return PP->getLangOpts();
}
time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const {
return IncludeTimestamps ? E->getModificationTime() : 0;
}
-ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile,
- Module *WritingModule, StringRef isysroot,
- bool ShouldCacheASTInMemory) {
+ASTFileSignature
+ASTWriter::WriteAST(llvm::PointerUn...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/115239
More information about the cfe-commits
mailing list