[clang] [clang][deps] Only write preprocessor info into PCMs (PR #115239)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 6 16:02:07 PST 2024


https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/115239

This patch builds on top of https://github.com/llvm/llvm-project/pull/115237 and https://github.com/llvm/llvm-project/pull/115235, only passing the `Preprocessor` object to `ASTWriter`. This reduces the size of scanning PCM files by 1/3 and speeds up scans by 16%.

>From 27985cf8df00f1edf6e74b7e50ce5be13569591c Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 6 Nov 2024 09:54:06 -0800
Subject: [PATCH 1/4] [clang][serialization] Pass `ASTContext` explicitly

---
 .../clang/Serialization/ASTRecordWriter.h     |   7 +-
 clang/include/clang/Serialization/ASTWriter.h |  28 ++---
 clang/lib/Serialization/ASTWriter.cpp         | 118 +++++++++---------
 clang/lib/Serialization/ASTWriterDecl.cpp     |  55 ++++----
 clang/lib/Serialization/ASTWriterStmt.cpp     |  15 +--
 5 files changed, 110 insertions(+), 113 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h
index d6090ba1a6c690..67720a0aebc1ca 100644
--- a/clang/include/clang/Serialization/ASTRecordWriter.h
+++ b/clang/include/clang/Serialization/ASTRecordWriter.h
@@ -60,8 +60,9 @@ class ASTRecordWriter
 
 public:
   /// Construct a ASTRecordWriter that uses the default encoding scheme.
-  ASTRecordWriter(ASTWriter &W, ASTWriter::RecordDataImpl &Record)
-      : DataStreamBasicWriter(W.getASTContext()), Writer(&W), Record(&Record) {}
+  ASTRecordWriter(ASTContext &Context, ASTWriter &W,
+                  ASTWriter::RecordDataImpl &Record)
+      : DataStreamBasicWriter(Context), Writer(&W), Record(&Record) {}
 
   /// Construct a ASTRecordWriter that uses the same encoding scheme as another
   /// ASTRecordWriter.
@@ -208,7 +209,7 @@ class ASTRecordWriter
 
   /// Emit a reference to a type.
   void AddTypeRef(QualType T) {
-    return Writer->AddTypeRef(T, *Record);
+    return Writer->AddTypeRef(getASTContext(), T, *Record);
   }
   void writeQualType(QualType T) {
     AddTypeRef(T);
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index d0e841f367c1e0..dc9fcd3c33726e 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -119,9 +119,6 @@ class ASTWriter : public ASTDeserializationListener,
   /// The PCM manager which manages memory buffers for pcm files.
   InMemoryModuleCache &ModuleCache;
 
-  /// The ASTContext we're writing.
-  ASTContext *Context = nullptr;
-
   /// The preprocessor we're writing.
   Preprocessor *PP = nullptr;
 
@@ -545,7 +542,7 @@ class ASTWriter : public ASTDeserializationListener,
   unsigned getSubmoduleID(Module *Mod);
 
   /// Write the given subexpression to the bitstream.
-  void WriteSubStmt(Stmt *S);
+  void WriteSubStmt(ASTContext &Context, Stmt *S);
 
   void WriteBlockInfoBlock();
   void WriteControlBlock(Preprocessor &PP, StringRef isysroot);
@@ -564,25 +561,25 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteHeaderSearch(const HeaderSearch &HS);
   void WritePreprocessorDetail(PreprocessingRecord &PPRec,
                                uint64_t MacroOffsetsBase);
-  void WriteSubmodules(Module *WritingModule);
+  void WriteSubmodules(Module *WritingModule, ASTContext &Context);
 
   void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
                                      bool isModule);
 
   unsigned TypeExtQualAbbrev = 0;
   void WriteTypeAbbrevs();
-  void WriteType(QualType T);
+  void WriteType(ASTContext &Context, QualType T);
 
   bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC);
 
-  void GenerateNameLookupTable(const DeclContext *DC,
+  void GenerateNameLookupTable(ASTContext &Context, const DeclContext *DC,
                                llvm::SmallVectorImpl<char> &LookupTable);
   uint64_t WriteDeclContextLexicalBlock(ASTContext &Context,
                                         const DeclContext *DC);
   uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
   void WriteTypeDeclOffsets();
   void WriteFileDeclIDsMap();
-  void WriteComments();
+  void WriteComments(ASTContext &Context);
   void WriteSelectors(Sema &SemaRef);
   void WriteReferencedSelectorsPool(Sema &SemaRef);
   void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver,
@@ -590,8 +587,10 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteDeclAndTypes(ASTContext &Context);
   void PrepareWritingSpecialDecls(Sema &SemaRef);
   void WriteSpecialDeclRecords(Sema &SemaRef);
-  void WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord);
-  void WriteDeclContextVisibleUpdate(const DeclContext *DC);
+  void WriteDeclUpdatesBlocks(ASTContext &Context,
+                              RecordDataImpl &OffsetsRecord);
+  void WriteDeclContextVisibleUpdate(ASTContext &Context,
+                                     const DeclContext *DC);
   void WriteFPPragmaOptions(const FPOptionsOverride &Opts);
   void WriteOpenCLExtensions(Sema &SemaRef);
   void WriteCUDAPragmas(Sema &SemaRef);
@@ -653,11 +652,6 @@ class ASTWriter : public ASTDeserializationListener,
             bool GeneratingReducedBMI = false);
   ~ASTWriter() override;
 
-  ASTContext &getASTContext() const {
-    assert(Context && "requested AST context when not writing AST");
-    return *Context;
-  }
-
   const LangOptions &getLangOpts() const;
 
   /// Get a timestamp for output into the AST file. The actual timestamp
@@ -723,10 +717,10 @@ class ASTWriter : public ASTDeserializationListener,
   uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name);
 
   /// Emit a reference to a type.
-  void AddTypeRef(QualType T, RecordDataImpl &Record);
+  void AddTypeRef(ASTContext &Context, QualType T, RecordDataImpl &Record);
 
   /// Force a type to be emitted and get its ID.
-  serialization::TypeID GetOrCreateTypeID(QualType T);
+  serialization::TypeID GetOrCreateTypeID(ASTContext &Context, QualType T);
 
   /// Find the first local declaration of a given local redeclarable
   /// decl.
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index b95e29cbc02515..d1af80b3243ba9 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -277,8 +277,8 @@ class ASTTypeWriter {
   ASTRecordWriter BasicWriter;
 
 public:
-  ASTTypeWriter(ASTWriter &Writer)
-    : Writer(Writer), BasicWriter(Writer, Record) {}
+  ASTTypeWriter(ASTContext &Context, ASTWriter &Writer)
+    : Writer(Writer), BasicWriter(Context, Writer, Record) {}
 
   uint64_t write(QualType T) {
     if (T.hasLocalNonFastQualifiers()) {
@@ -2872,7 +2872,7 @@ static unsigned getNumberOfModules(Module *Mod) {
   return ChildModules + 1;
 }
 
-void ASTWriter::WriteSubmodules(Module *WritingModule) {
+void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext &Context) {
   // Enter the submodule description block.
   Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
 
@@ -3124,7 +3124,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
     // Emit the reachable initializers.
     // The initializer may only be unreachable in reduced BMI.
     RecordData Inits;
-    for (Decl *D : Context->getModuleInitializers(Mod))
+    for (Decl *D : Context.getModuleInitializers(Mod))
       if (wasDeclEmitted(D))
         AddDeclRef(D, Inits);
     if (!Inits.empty())
@@ -3259,7 +3259,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
 //===----------------------------------------------------------------------===//
 
 /// Write the representation of a type to the AST stream.
-void ASTWriter::WriteType(QualType T) {
+void ASTWriter::WriteType(ASTContext &Context, QualType T) {
   TypeIdx &IdxRef = TypeIdxs[T];
   if (IdxRef.getValue() == 0) // we haven't seen this type before.
     IdxRef = TypeIdx(0, NextTypeID++);
@@ -3269,7 +3269,8 @@ void ASTWriter::WriteType(QualType T) {
   assert(Idx.getValue() >= FirstTypeID && "Writing predefined type");
 
   // Emit the type's representation.
-  uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset;
+  uint64_t Offset =
+      ASTTypeWriter(Context, *this).write(T) - DeclTypesBlockStartOffset;
 
   // Record the offset for this type.
   uint64_t Index = Idx.getValue() - FirstTypeID;
@@ -3393,7 +3394,7 @@ void ASTWriter::WriteFileDeclIDsMap() {
   Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs));
 }
 
-void ASTWriter::WriteComments() {
+void ASTWriter::WriteComments(ASTContext &Context) {
   Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3);
   auto _ = llvm::make_scope_exit([this] { Stream.ExitBlock(); });
   if (!PP->getPreprocessorOpts().WriteCommentListToPCH)
@@ -3406,7 +3407,7 @@ void ASTWriter::WriteComments() {
     return;
 
   RecordData Record;
-  for (const auto &FO : Context->Comments.OrderedComments) {
+  for (const auto &FO : Context.Comments.OrderedComments) {
     for (const auto &OC : FO.second) {
       const RawComment *I = OC.second;
       Record.clear();
@@ -3656,7 +3657,7 @@ void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) {
     return;
 
   RecordData Record;
-  ASTRecordWriter Writer(*this, Record);
+  ASTRecordWriter Writer(SemaRef.Context, *this, Record);
 
   // Note: this writes out all references even for a dependent AST. But it is
   // very tricky to fix, and given that @selector shouldn't really appear in
@@ -4137,9 +4138,9 @@ static bool isLookupResultNotInteresting(ASTWriter &Writer,
   return true;
 }
 
-void
-ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
-                                   llvm::SmallVectorImpl<char> &LookupTable) {
+void ASTWriter::GenerateNameLookupTable(
+    ASTContext &Context, const DeclContext *ConstDC,
+    llvm::SmallVectorImpl<char> &LookupTable) {
   assert(!ConstDC->hasLazyLocalLexicalLookups() &&
          !ConstDC->hasLazyExternalLexicalLookups() &&
          "must call buildLookups first");
@@ -4234,8 +4235,8 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
     // another declaration in the redecl chain. Any non-implicit constructor or
     // conversion function which doesn't occur in all the lexical contexts
     // would be an ODR violation.
-    auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName(
-        Context->getCanonicalType(Context->getRecordType(D)));
+    auto ImplicitCtorName = Context.DeclarationNames.getCXXConstructorName(
+        Context.getCanonicalType(Context.getRecordType(D)));
     if (ConstructorNameSet.erase(ImplicitCtorName))
       Names.push_back(ImplicitCtorName);
 
@@ -4415,7 +4416,7 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
 
   // Create the on-disk hash table in a buffer.
   SmallString<4096> LookupTable;
-  GenerateNameLookupTable(DC, LookupTable);
+  GenerateNameLookupTable(Context, DC, LookupTable);
 
   // Write the lookup table
   RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE};
@@ -4431,14 +4432,15 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
 /// DeclContext in a dependent AST file. As such, they only exist for the TU
 /// (in C++), for namespaces, and for classes with forward-declared unscoped
 /// enumeration members (in C++11).
-void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) {
+void ASTWriter::WriteDeclContextVisibleUpdate(ASTContext &Context,
+                                              const DeclContext *DC) {
   StoredDeclsMap *Map = DC->getLookupPtr();
   if (!Map || Map->empty())
     return;
 
   // Create the on-disk hash table in a buffer.
   SmallString<4096> LookupTable;
-  GenerateNameLookupTable(DC, LookupTable);
+  GenerateNameLookupTable(Context, DC, LookupTable);
 
   // If we're updating a namespace, select a key declaration as the key for the
   // update record; those are the only ones that will be checked on reload.
@@ -4753,15 +4755,12 @@ void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) {
 }
 
 bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) {
-  assert(Context && "should have context when outputting path");
-
   // Leave special file names as they are.
   StringRef PathStr(Path.data(), Path.size());
   if (PathStr == "<built-in>" || PathStr == "<command line>")
     return false;
 
-  bool Changed =
-      cleanPathForOutput(Context->getSourceManager().getFileManager(), Path);
+  bool Changed = cleanPathForOutput(PP->getFileManager(), Path);
 
   // Remove a prefix to make the path relative, if relevant.
   const char *PathBegin = Path.data();
@@ -4850,7 +4849,7 @@ ASTWriter::~ASTWriter() = default;
 
 const LangOptions &ASTWriter::getLangOpts() const {
   assert(WritingAST && "can't determine lang opts when not writing AST");
-  return Context->getLangOpts();
+  return PP->getLangOpts();
 }
 
 time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const {
@@ -4874,11 +4873,9 @@ ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile,
 
   WriteBlockInfoBlock();
 
-  Context = &SemaRef.Context;
   PP = &SemaRef.PP;
   this->WritingModule = WritingModule;
   ASTFileSignature Signature = WriteASTCore(SemaRef, isysroot, WritingModule);
-  Context = nullptr;
   PP = nullptr;
   this->WritingModule = nullptr;
   this->BaseDirectory.clear();
@@ -5417,14 +5414,14 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
   // Form the record of special types.
   RecordData SpecialTypes;
-  AddTypeRef(Context.getRawCFConstantStringType(), SpecialTypes);
-  AddTypeRef(Context.getFILEType(), SpecialTypes);
-  AddTypeRef(Context.getjmp_bufType(), SpecialTypes);
-  AddTypeRef(Context.getsigjmp_bufType(), SpecialTypes);
-  AddTypeRef(Context.ObjCIdRedefinitionType, SpecialTypes);
-  AddTypeRef(Context.ObjCClassRedefinitionType, SpecialTypes);
-  AddTypeRef(Context.ObjCSelRedefinitionType, SpecialTypes);
-  AddTypeRef(Context.getucontext_tType(), SpecialTypes);
+  AddTypeRef(Context, Context.getRawCFConstantStringType(), SpecialTypes);
+  AddTypeRef(Context, Context.getFILEType(), SpecialTypes);
+  AddTypeRef(Context, Context.getjmp_bufType(), SpecialTypes);
+  AddTypeRef(Context, Context.getsigjmp_bufType(), SpecialTypes);
+  AddTypeRef(Context, Context.ObjCIdRedefinitionType, SpecialTypes);
+  AddTypeRef(Context, Context.ObjCClassRedefinitionType, SpecialTypes);
+  AddTypeRef(Context, Context.ObjCSelRedefinitionType, SpecialTypes);
+  AddTypeRef(Context, Context.getucontext_tType(), SpecialTypes);
 
   PrepareWritingSpecialDecls(SemaRef);
 
@@ -5523,7 +5520,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
   WriteFileDeclIDsMap();
   WriteSourceManagerBlock(PP.getSourceManager());
-  WriteComments();
+  WriteComments(Context);
   WritePreprocessor(PP, isModule);
   WriteHeaderSearch(PP.getHeaderSearchInfo());
   WriteSelectors(SemaRef);
@@ -5536,7 +5533,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
   // If we're emitting a module, write out the submodule information.
   if (WritingModule)
-    WriteSubmodules(WritingModule);
+    WriteSubmodules(WritingModule, SemaRef.Context);
 
   Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes);
 
@@ -5656,12 +5653,12 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) {
   WriteTypeAbbrevs();
   WriteDeclAbbrevs();
   do {
-    WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord);
+    WriteDeclUpdatesBlocks(Context, DeclUpdatesOffsetsRecord);
     while (!DeclTypesToEmit.empty()) {
       DeclOrType DOT = DeclTypesToEmit.front();
       DeclTypesToEmit.pop();
       if (DOT.isType())
-        WriteType(DOT.getType());
+        WriteType(Context, DOT.getType());
       else
         WriteDecl(Context, DOT.getDecl());
     }
@@ -5757,18 +5754,19 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) {
   UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv));
 
   // And a visible updates block for the translation unit.
-  WriteDeclContextVisibleUpdate(TU);
+  WriteDeclContextVisibleUpdate(Context, TU);
 
   // If we have any extern "C" names, write out a visible update for them.
   if (Context.ExternCContext)
-    WriteDeclContextVisibleUpdate(Context.ExternCContext);
+    WriteDeclContextVisibleUpdate(Context, Context.ExternCContext);
 
   // Write the visible updates to DeclContexts.
   for (auto *DC : UpdatedDeclContexts)
-    WriteDeclContextVisibleUpdate(DC);
+    WriteDeclContextVisibleUpdate(Context, DC);
 }
 
-void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
+void ASTWriter::WriteDeclUpdatesBlocks(ASTContext &Context,
+                                       RecordDataImpl &OffsetsRecord) {
   if (DeclUpdates.empty())
     return;
 
@@ -5781,7 +5779,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
     bool HasUpdatedBody = false;
     bool HasAddedVarDefinition = false;
     RecordData RecordData;
-    ASTRecordWriter Record(*this, RecordData);
+    ASTRecordWriter Record(Context, *this, RecordData);
     for (auto &Update : DeclUpdate.second) {
       DeclUpdateKind Kind = (DeclUpdateKind)Update.getKind();
 
@@ -5827,7 +5825,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
         Record.push_back(RD->isParamDestroyedInCallee());
         Record.push_back(llvm::to_underlying(RD->getArgPassingRestrictions()));
         Record.AddCXXDefinitionData(RD);
-        Record.AddOffset(WriteDeclContextLexicalBlock(*Context, RD));
+        Record.AddOffset(WriteDeclContextLexicalBlock(Context, RD));
 
         // This state is sometimes updated by template instantiation, when we
         // switch from the specialization referring to the template declaration
@@ -5880,7 +5878,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
       }
 
       case UPD_CXX_DEDUCED_RETURN_TYPE:
-        Record.push_back(GetOrCreateTypeID(Update.getType()));
+        Record.push_back(GetOrCreateTypeID(Context, Update.getType()));
         break;
 
       case UPD_DECL_MARKED_USED:
@@ -6022,8 +6020,7 @@ ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) {
   unsigned ModuleFileIndex = 0;
 
   // See SourceLocationEncoding.h for the encoding details.
-  if (Context->getSourceManager().isLoadedSourceLocation(Loc) &&
-      Loc.isValid()) {
+  if (PP->getSourceManager().isLoadedSourceLocation(Loc) && Loc.isValid()) {
     assert(getChain());
     auto SLocMapI = getChain()->GlobalSLocOffsetMap.find(
         SourceManager::MaxLoadedOffset - Loc.getOffset() - 1);
@@ -6184,8 +6181,9 @@ void ASTRecordWriter::AddTypeLoc(TypeLoc TL, LocSeq *OuterSeq) {
     TLW.Visit(TL);
 }
 
-void ASTWriter::AddTypeRef(QualType T, RecordDataImpl &Record) {
-  Record.push_back(GetOrCreateTypeID(T));
+void ASTWriter::AddTypeRef(ASTContext &Context, QualType T,
+                           RecordDataImpl &Record) {
+  Record.push_back(GetOrCreateTypeID(Context, T));
 }
 
 template <typename IdxForTypeTy>
@@ -6213,9 +6211,8 @@ static TypeID MakeTypeID(ASTContext &Context, QualType T,
   return IdxForType(T).asTypeID(FastQuals);
 }
 
-TypeID ASTWriter::GetOrCreateTypeID(QualType T) {
-  assert(Context);
-  return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx {
+TypeID ASTWriter::GetOrCreateTypeID(ASTContext &Context, QualType T) {
+  return MakeTypeID(Context, T, [&](QualType T) -> TypeIdx {
     if (T.isNull())
       return TypeIdx();
     assert(!T.getLocalFastQualifiers());
@@ -6335,7 +6332,7 @@ void ASTWriter::associateDeclWithFile(const Decl *D, LocalDeclID ID) {
   if (isa<ParmVarDecl, TemplateTemplateParmDecl>(D))
     return;
 
-  SourceManager &SM = Context->getSourceManager();
+  SourceManager &SM = PP->getSourceManager();
   SourceLocation FileLoc = SM.getFileLoc(Loc);
   assert(SM.isLocalSourceLocation(FileLoc));
   FileID FID;
@@ -6530,10 +6527,10 @@ void ASTRecordWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base) {
                                           : SourceLocation());
 }
 
-static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W,
+static uint64_t EmitCXXBaseSpecifiers(ASTContext &Context, ASTWriter &W,
                                       ArrayRef<CXXBaseSpecifier> Bases) {
   ASTWriter::RecordData Record;
-  ASTRecordWriter Writer(W, Record);
+  ASTRecordWriter Writer(Context, W, Record);
   Writer.push_back(Bases.size());
 
   for (auto &Base : Bases)
@@ -6544,14 +6541,14 @@ static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W,
 
 // FIXME: Move this out of the main ASTRecordWriter interface.
 void ASTRecordWriter::AddCXXBaseSpecifiers(ArrayRef<CXXBaseSpecifier> Bases) {
-  AddOffset(EmitCXXBaseSpecifiers(*Writer, Bases));
+  AddOffset(EmitCXXBaseSpecifiers(getASTContext(), *Writer, Bases));
 }
 
 static uint64_t
-EmitCXXCtorInitializers(ASTWriter &W,
+EmitCXXCtorInitializers(ASTContext &Context, ASTWriter &W,
                         ArrayRef<CXXCtorInitializer *> CtorInits) {
   ASTWriter::RecordData Record;
-  ASTRecordWriter Writer(W, Record);
+  ASTRecordWriter Writer(Context, W, Record);
   Writer.push_back(CtorInits.size());
 
   for (auto *Init : CtorInits) {
@@ -6585,7 +6582,7 @@ EmitCXXCtorInitializers(ASTWriter &W,
 // FIXME: Move this out of the main ASTRecordWriter interface.
 void ASTRecordWriter::AddCXXCtorInitializers(
     ArrayRef<CXXCtorInitializer *> CtorInits) {
-  AddOffset(EmitCXXCtorInitializers(*Writer, CtorInits));
+  AddOffset(EmitCXXCtorInitializers(getASTContext(), *Writer, CtorInits));
 }
 
 void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
@@ -6613,18 +6610,17 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
 
   bool ModulesCodegen =
       !D->isDependentType() &&
-     (Writer->Context->getLangOpts().ModulesDebugInfo ||
-      D->isInNamedModule());
+      (Writer->getLangOpts().ModulesDebugInfo || D->isInNamedModule());
   Record->push_back(ModulesCodegen);
   if (ModulesCodegen)
     Writer->AddDeclRef(D, Writer->ModularCodegenDecls);
 
   // IsLambda bit is already saved.
 
-  AddUnresolvedSet(Data.Conversions.get(*Writer->Context));
+  AddUnresolvedSet(Data.Conversions.get(getASTContext()));
   Record->push_back(Data.ComputedVisibleConversions);
   if (Data.ComputedVisibleConversions)
-    AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context));
+    AddUnresolvedSet(Data.VisibleConversions.get(getASTContext()));
   // Data.Definition is the owning decl, no need to write it.
 
   if (!Data.IsLambda) {
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index b9ce3db41ef916..ad357e30d57529 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -34,7 +34,6 @@ using namespace serialization;
 namespace clang {
   class ASTDeclWriter : public DeclVisitor<ASTDeclWriter, void> {
     ASTWriter &Writer;
-    ASTContext &Context;
     ASTRecordWriter Record;
 
     serialization::DeclCode Code;
@@ -45,7 +44,7 @@ namespace clang {
   public:
     ASTDeclWriter(ASTWriter &Writer, ASTContext &Context,
                   ASTWriter::RecordDataImpl &Record, bool GeneratingReducedBMI)
-        : Writer(Writer), Context(Context), Record(Writer, Record),
+        : Writer(Writer), Record(Context, Writer, Record),
           Code((serialization::DeclCode)0), AbbrevToUse(0),
           GeneratingReducedBMI(GeneratingReducedBMI) {}
 
@@ -217,7 +216,7 @@ namespace clang {
       // If we have any lazy specializations, and the external AST source is
       // our chained AST reader, we can just write out the DeclIDs. Otherwise,
       // we need to resolve them to actual declarations.
-      if (Writer.Chain != Writer.Context->getExternalSource() &&
+      if (Writer.Chain != Record.getASTContext().getExternalSource() &&
           Common->LazySpecializations) {
         D->LoadLazySpecializations();
         assert(!Common->LazySpecializations);
@@ -811,8 +810,8 @@ void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) {
   Record.push_back(D->isRedeclaration());
   Record.push_back(D->hasRedeclaration());
   if (D->hasRedeclaration()) {
-    assert(Context.getObjCMethodRedeclaration(D));
-    Record.AddDeclRef(Context.getObjCMethodRedeclaration(D));
+    assert(Record.getASTContext().getObjCMethodRedeclaration(D));
+    Record.AddDeclRef(Record.getASTContext().getObjCMethodRedeclaration(D));
   }
 
   // FIXME: stable encoding for @required/@optional
@@ -1039,7 +1038,8 @@ void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) {
     Record.AddStmt(D->getBitWidth());
 
   if (!D->getDeclName() || D->isPlaceholderVar(Writer.getLangOpts()))
-    Record.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D));
+    Record.AddDeclRef(
+        Record.getASTContext().getInstantiatedFromUnnamedFieldDecl(D));
 
   if (D->getDeclContext() == D->getLexicalDeclContext() &&
       !D->hasAttrs() &&
@@ -1118,11 +1118,11 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
     // strong definition in the module interface is provided by the
     // compilation of that unit, not by its users. (Inline variables are still
     // emitted in module users.)
-    ModulesCodegen =
-        (Writer.WritingModule->isInterfaceOrPartition() ||
-         (D->hasAttr<DLLExportAttr>() &&
-          Writer.Context->getLangOpts().BuildingPCHWithObjectFile)) &&
-        Writer.Context->GetGVALinkageForVariable(D) >= GVA_StrongExternal;
+    ModulesCodegen = (Writer.WritingModule->isInterfaceOrPartition() ||
+                      (D->hasAttr<DLLExportAttr>() &&
+                       Writer.getLangOpts().BuildingPCHWithObjectFile)) &&
+                     Record.getASTContext().GetGVALinkageForVariable(D) >=
+                         GVA_StrongExternal;
   }
   VarDeclBits.addBit(ModulesCodegen);
 
@@ -1163,7 +1163,7 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
     Writer.AddDeclRef(D, Writer.ModularCodegenDecls);
 
   if (D->hasAttr<BlocksAttr>()) {
-    BlockVarCopyInit Init = Writer.Context->getBlockVarCopyInit(D);
+    BlockVarCopyInit Init = Record.getASTContext().getBlockVarCopyInit(D);
     Record.AddStmt(Init.getCopyExpr());
     if (Init.getCopyExpr())
       Record.push_back(Init.canThrow());
@@ -1411,7 +1411,7 @@ void ASTDeclWriter::VisitUsingDecl(UsingDecl *D) {
   Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName());
   Record.AddDeclRef(D->FirstUsingShadow.getPointer());
   Record.push_back(D->hasTypename());
-  Record.AddDeclRef(Context.getInstantiatedFromUsingDecl(D));
+  Record.AddDeclRef(Record.getASTContext().getInstantiatedFromUsingDecl(D));
   Code = serialization::DECL_USING;
 }
 
@@ -1421,7 +1421,7 @@ void ASTDeclWriter::VisitUsingEnumDecl(UsingEnumDecl *D) {
   Record.AddSourceLocation(D->getEnumLoc());
   Record.AddTypeSourceInfo(D->getEnumType());
   Record.AddDeclRef(D->FirstUsingShadow.getPointer());
-  Record.AddDeclRef(Context.getInstantiatedFromUsingEnumDecl(D));
+  Record.AddDeclRef(Record.getASTContext().getInstantiatedFromUsingEnumDecl(D));
   Code = serialization::DECL_USING_ENUM;
 }
 
@@ -1440,7 +1440,8 @@ void ASTDeclWriter::VisitUsingShadowDecl(UsingShadowDecl *D) {
   Record.AddDeclRef(D->getTargetDecl());
   Record.push_back(D->getIdentifierNamespace());
   Record.AddDeclRef(D->UsingOrNextShadow);
-  Record.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D));
+  Record.AddDeclRef(
+      Record.getASTContext().getInstantiatedFromUsingShadowDecl(D));
 
   if (D->getDeclContext() == D->getLexicalDeclContext() &&
       D->getFirstDecl() == D->getMostRecentDecl() && !D->hasAttrs() &&
@@ -1544,7 +1545,7 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
   // FIXME: Avoid adding the key function if the class is defined in
   // module purview since in that case the key function is meaningless.
   if (D->isCompleteDefinition())
-    Record.AddDeclRef(Context.getCurrentKeyFunction(D));
+    Record.AddDeclRef(Record.getASTContext().getCurrentKeyFunction(D));
 
   Code = serialization::DECL_CXX_RECORD;
 }
@@ -1735,7 +1736,8 @@ void ASTDeclWriter::VisitClassTemplateDecl(ClassTemplateDecl *D) {
   // Force emitting the corresponding deduction guide in reduced BMI mode.
   // Otherwise, the deduction guide may be optimized out incorrectly.
   if (Writer.isGeneratingReducedBMI()) {
-    auto Name = Context.DeclarationNames.getCXXDeductionGuideName(D);
+    auto Name =
+        Record.getASTContext().DeclarationNames.getCXXDeductionGuideName(D);
     for (auto *DG : D->getDeclContext()->noload_lookup(Name))
       Writer.GetDeclRef(DG->getCanonicalDecl());
   }
@@ -1791,8 +1793,9 @@ void ASTDeclWriter::VisitClassTemplateSpecializationDecl(
   // FIXME: Would it be more efficient to add a callback register function
   // in sema to register the deduction guide?
   if (Writer.isWritingStdCXXNamedModules()) {
-    auto Name = Context.DeclarationNames.getCXXDeductionGuideName(
-        D->getSpecializedTemplate());
+    auto Name =
+        Record.getASTContext().DeclarationNames.getCXXDeductionGuideName(
+            D->getSpecializedTemplate());
     for (auto *DG : D->getDeclContext()->noload_lookup(Name))
       Writer.GetDeclRef(DG->getCanonicalDecl());
   }
@@ -2022,8 +2025,10 @@ void ASTDeclWriter::VisitDeclContext(DeclContext *DC) {
     // details.
     Writer.DelayedNamespace.push_back(cast<NamespaceDecl>(DC));
   } else {
-    LexicalOffset = Writer.WriteDeclContextLexicalBlock(Context, DC);
-    VisibleOffset = Writer.WriteDeclContextVisibleBlock(Context, DC);
+    LexicalOffset =
+        Writer.WriteDeclContextLexicalBlock(Record.getASTContext(), DC);
+    VisibleOffset =
+        Writer.WriteDeclContextVisibleBlock(Record.getASTContext(), DC);
   }
 
   Record.AddOffset(LexicalOffset);
@@ -2880,18 +2885,18 @@ void ASTRecordWriter::AddFunctionDefinition(const FunctionDecl *FD) {
       // strong definition in the module interface is provided by the
       // compilation of that unit, not by its users. (Inline functions are still
       // emitted in module users.)
-      Linkage = Writer->Context->GetGVALinkageForFunction(FD);
+      Linkage = getASTContext().GetGVALinkageForFunction(FD);
       ModulesCodegen = *Linkage >= GVA_StrongExternal;
     }
-    if (Writer->Context->getLangOpts().ModulesCodegen ||
+    if (Writer->getLangOpts().ModulesCodegen ||
         (FD->hasAttr<DLLExportAttr>() &&
-         Writer->Context->getLangOpts().BuildingPCHWithObjectFile)) {
+         Writer->getLangOpts().BuildingPCHWithObjectFile)) {
 
       // Under -fmodules-codegen, codegen is performed for all non-internal,
       // non-always_inline functions, unless they are available elsewhere.
       if (!FD->hasAttr<AlwaysInlineAttr>()) {
         if (!Linkage)
-          Linkage = Writer->Context->GetGVALinkageForFunction(FD);
+          Linkage = getASTContext().GetGVALinkageForFunction(FD);
         ModulesCodegen =
             *Linkage != GVA_Internal && *Linkage != GVA_AvailableExternally;
       }
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 321e0031661ee2..7f700c2977e09c 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -91,8 +91,9 @@ namespace clang {
     PakedBitsWriter CurrentPackingBits;
 
   public:
-    ASTStmtWriter(ASTWriter &Writer, ASTWriter::RecordData &Record)
-        : Writer(Writer), Record(Writer, Record),
+    ASTStmtWriter(ASTContext &Context, ASTWriter &Writer,
+                  ASTWriter::RecordData &Record)
+        : Writer(Writer), Record(Context, Writer, Record),
           Code(serialization::STMT_NULL_PTR), AbbrevToUse(0),
           CurrentPackingBits(this->Record) {}
 
@@ -2112,7 +2113,7 @@ void ASTStmtWriter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) {
     // propagted.
     DeclarationName Name = E->getName();
     for (auto *Found :
-         Writer.getASTContext().getTranslationUnitDecl()->lookup(Name))
+         Record.getASTContext().getTranslationUnitDecl()->lookup(Name))
       if (Found->isFromASTFile())
         Writer.GetDeclRef(Found);
 
@@ -2952,9 +2953,9 @@ void ASTWriter::ClearSwitchCaseIDs() {
 
 /// Write the given substatement or subexpression to the
 /// bitstream.
-void ASTWriter::WriteSubStmt(Stmt *S) {
+void ASTWriter::WriteSubStmt(ASTContext &Context, Stmt *S) {
   RecordData Record;
-  ASTStmtWriter Writer(*this, Record);
+  ASTStmtWriter Writer(Context, *this, Record);
   ++NumStatements;
 
   if (!S) {
@@ -3003,7 +3004,7 @@ void ASTRecordWriter::FlushStmts() {
   assert(Writer->ParentStmts.empty() && "unexpected entries in parent stmt map");
 
   for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) {
-    Writer->WriteSubStmt(StmtsToEmit[I]);
+    Writer->WriteSubStmt(getASTContext(), StmtsToEmit[I]);
 
     assert(N == StmtsToEmit.size() && "record modified while being written!");
 
@@ -3024,7 +3025,7 @@ void ASTRecordWriter::FlushSubStmts() {
   // that a simple stack machine can be used when loading), and don't emit a
   // STMT_STOP after each one.
   for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) {
-    Writer->WriteSubStmt(StmtsToEmit[N - I - 1]);
+    Writer->WriteSubStmt(getASTContext(), StmtsToEmit[N - I - 1]);
     assert(N == StmtsToEmit.size() && "record modified while being written!");
   }
 

>From 5e66ce95bfb7cb401b5757f8491a89d369ad2010 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 6 Nov 2024 11:18:09 -0800
Subject: [PATCH 2/4] [clang][serialization] Make `ASTWriter` accept
 `Preprocessor` only

---
 clang/include/clang/Serialization/ASTWriter.h |  15 +-
 clang/lib/Frontend/ASTUnit.cpp                |   2 +-
 clang/lib/Serialization/ASTWriter.cpp         | 182 ++++++++++--------
 clang/lib/Serialization/GeneratePCH.cpp       |   2 +-
 4 files changed, 114 insertions(+), 87 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index dc9fcd3c33726e..7b5b0117874d7a 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -561,7 +561,7 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteHeaderSearch(const HeaderSearch &HS);
   void WritePreprocessorDetail(PreprocessingRecord &PPRec,
                                uint64_t MacroOffsetsBase);
-  void WriteSubmodules(Module *WritingModule, ASTContext &Context);
+  void WriteSubmodules(Module *WritingModule, ASTContext *Context);
 
   void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
                                      bool isModule);
@@ -582,7 +582,7 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteComments(ASTContext &Context);
   void WriteSelectors(Sema &SemaRef);
   void WriteReferencedSelectorsPool(Sema &SemaRef);
-  void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver,
+  void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver *IdResolver,
                             bool IsModule);
   void WriteDeclAndTypes(ASTContext &Context);
   void PrepareWritingSpecialDecls(Sema &SemaRef);
@@ -639,7 +639,7 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteDeclAbbrevs();
   void WriteDecl(ASTContext &Context, Decl *D);
 
-  ASTFileSignature WriteASTCore(Sema &SemaRef, StringRef isysroot,
+  ASTFileSignature WriteASTCore(Sema *SemaPtr, StringRef isysroot,
                                 Module *WritingModule);
 
 public:
@@ -661,8 +661,8 @@ class ASTWriter : public ASTDeserializationListener,
 
   /// Write a precompiled header for the given semantic analysis.
   ///
-  /// \param SemaRef a reference to the semantic analysis object that processed
-  /// the AST to be written into the precompiled header.
+  /// \param Subject The object that processed the input to be written into the
+  /// AST file.
   ///
   /// \param WritingModule The module that we are writing. If null, we are
   /// writing a precompiled header.
@@ -673,8 +673,9 @@ class ASTWriter : public ASTDeserializationListener,
   ///
   /// \return the module signature, which eventually will be a hash of
   /// the module but currently is merely a random 32-bit number.
-  ASTFileSignature WriteAST(Sema &SemaRef, StringRef OutputFile,
-                            Module *WritingModule, StringRef isysroot,
+  ASTFileSignature WriteAST(llvm::PointerUnion<Sema *, Preprocessor*> Subject,
+                            StringRef OutputFile, Module *WritingModule,
+                            StringRef isysroot,
                             bool ShouldCacheASTInMemory = false);
 
   /// Emit a token.
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 4aec928f9eb0a5..f58e27a7979d1e 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -2359,7 +2359,7 @@ bool ASTUnit::Save(StringRef File) {
 
 static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer,
                           Sema &S, raw_ostream &OS) {
-  Writer.WriteAST(S, std::string(), nullptr, "");
+  Writer.WriteAST(&S, std::string(), nullptr, "");
 
   // Write the generated bitstream to "Out".
   if (!Buffer.empty())
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index d1af80b3243ba9..43fac29f32609c 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -2872,7 +2872,7 @@ static unsigned getNumberOfModules(Module *Mod) {
   return ChildModules + 1;
 }
 
-void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext &Context) {
+void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
   // Enter the submodule description block.
   Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
 
@@ -3123,12 +3123,14 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext &Context) {
 
     // Emit the reachable initializers.
     // The initializer may only be unreachable in reduced BMI.
-    RecordData Inits;
-    for (Decl *D : Context.getModuleInitializers(Mod))
-      if (wasDeclEmitted(D))
-        AddDeclRef(D, Inits);
-    if (!Inits.empty())
-      Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits);
+    if (Context) {
+      RecordData Inits;
+      for (Decl *D : Context->getModuleInitializers(Mod))
+        if (wasDeclEmitted(D))
+          AddDeclRef(D, Inits);
+      if (!Inits.empty())
+        Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits);
+    }
 
     // Emit the name of the re-exported module, if any.
     if (!Mod->ExportAsModule.empty()) {
@@ -3743,7 +3745,7 @@ bool IsInterestingNonMacroIdentifier(const IdentifierInfo *II,
 class ASTIdentifierTableTrait {
   ASTWriter &Writer;
   Preprocessor &PP;
-  IdentifierResolver &IdResolver;
+  IdentifierResolver *IdResolver;
   bool IsModule;
   bool NeedDecls;
   ASTWriter::RecordData *InterestingIdentifierOffsets;
@@ -3768,7 +3770,7 @@ class ASTIdentifierTableTrait {
   using offset_type = unsigned;
 
   ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
-                          IdentifierResolver &IdResolver, bool IsModule,
+                          IdentifierResolver *IdResolver, bool IsModule,
                           ASTWriter::RecordData *InterestingIdentifierOffsets)
       : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
         NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
@@ -3807,8 +3809,8 @@ class ASTIdentifierTableTrait {
       if (MacroOffset)
         DataLen += 4; // MacroDirectives offset.
 
-      if (NeedDecls)
-        DataLen += std::distance(IdResolver.begin(II), IdResolver.end()) *
+      if (NeedDecls && IdResolver)
+        DataLen += std::distance(IdResolver->begin(II), IdResolver->end()) *
                    sizeof(DeclID);
     }
     return emitULEBKeyDataLength(KeyLen, DataLen, Out);
@@ -3846,14 +3848,14 @@ class ASTIdentifierTableTrait {
     if (HadMacroDefinition)
       LE.write<uint32_t>(MacroOffset);
 
-    if (NeedDecls) {
+    if (NeedDecls && IdResolver) {
       // Emit the declaration IDs in reverse order, because the
       // IdentifierResolver provides the declarations as they would be
       // visible (e.g., the function "stat" would come before the struct
       // "stat"), but the ASTReader adds declarations to the end of the list
       // (so we need to see the struct "stat" before the function "stat").
       // Only emit declarations that aren't from a chained PCH, though.
-      SmallVector<NamedDecl *, 16> Decls(IdResolver.decls(II));
+      SmallVector<NamedDecl *, 16> Decls(IdResolver->decls(II));
       for (NamedDecl *D : llvm::reverse(Decls))
         LE.write<DeclID>((DeclID)Writer.getDeclID(
             getDeclForLocalLookup(PP.getLangOpts(), D)));
@@ -3873,7 +3875,7 @@ static bool isLocalIdentifierID(IdentifierID ID) { return !(ID >> 32); }
 /// (the actual identifiers themselves) and a separate "offsets" index
 /// that maps identifier IDs to locations within the blob.
 void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
-                                     IdentifierResolver &IdResolver,
+                                     IdentifierResolver *IdResolver,
                                      bool IsModule) {
   using namespace llvm;
 
@@ -4856,14 +4858,21 @@ time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const {
   return IncludeTimestamps ? E->getModificationTime() : 0;
 }
 
-ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile,
-                                     Module *WritingModule, StringRef isysroot,
-                                     bool ShouldCacheASTInMemory) {
+ASTFileSignature
+ASTWriter::WriteAST(llvm::PointerUnion<Sema *, Preprocessor *> Subject,
+                    StringRef OutputFile, Module *WritingModule,
+                    StringRef isysroot, bool ShouldCacheASTInMemory) {
   llvm::TimeTraceScope scope("WriteAST", OutputFile);
   WritingAST = true;
 
-  ASTHasCompilerErrors =
-      SemaRef.PP.getDiagnostics().hasUncompilableErrorOccurred();
+  Sema *SemaPtr = Subject.dyn_cast<Sema *>();
+  Preprocessor &PPRef = [&]() -> Preprocessor & {
+    if (SemaPtr)
+      return SemaPtr->getPreprocessor();
+    return *Subject.get<Preprocessor *>();
+  }();
+
+  ASTHasCompilerErrors = PPRef.getDiagnostics().hasUncompilableErrorOccurred();
 
   // Emit the file header.
   Stream.Emit((unsigned)'C', 8);
@@ -4873,16 +4882,16 @@ ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile,
 
   WriteBlockInfoBlock();
 
-  PP = &SemaRef.PP;
+  PP = &PPRef;
   this->WritingModule = WritingModule;
-  ASTFileSignature Signature = WriteASTCore(SemaRef, isysroot, WritingModule);
+  ASTFileSignature Signature = WriteASTCore(SemaPtr, isysroot, WritingModule);
   PP = nullptr;
   this->WritingModule = nullptr;
   this->BaseDirectory.clear();
 
   WritingAST = false;
 
-  if (WritingModule && SemaRef.PP.getHeaderSearchInfo()
+  if (WritingModule && PPRef.getHeaderSearchInfo()
                            .getHeaderSearchOpts()
                            .ModulesValidateOncePerBuildSession)
     updateModuleTimestamp(OutputFile);
@@ -5347,7 +5356,7 @@ void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) {
     Stream.EmitRecord(VTABLES_TO_EMIT, VTablesToEmit);
 }
 
-ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
+ASTFileSignature ASTWriter::WriteASTCore(Sema *SemaPtr, StringRef isysroot,
                                          Module *WritingModule) {
   using namespace llvm;
 
@@ -5357,14 +5366,11 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   if (Chain)
     Chain->finalizeForWriting();
 
-  ASTContext &Context = SemaRef.Context;
-  Preprocessor &PP = SemaRef.PP;
-
   // This needs to be done very early, since everything that writes
   // SourceLocations or FileIDs depends on it.
   computeNonAffectingInputFiles();
 
-  writeUnhashedControlBlock(PP);
+  writeUnhashedControlBlock(*PP);
 
   // Don't reuse type ID and Identifier ID from readers for C++ standard named
   // modules since we want to support no-transitive-change model for named
@@ -5389,7 +5395,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   // We do this before emitting any Decl and Types to make sure the
   // Identifier ID is stable.
   SmallVector<const IdentifierInfo *, 128> IIs;
-  for (const auto &ID : PP.getIdentifierTable())
+  for (const auto &ID : PP->getIdentifierTable())
     if (IsInterestingNonMacroIdentifier(ID.second, *this))
       IIs.push_back(ID.second);
   // Sort the identifiers lexicographically before getting the references so
@@ -5402,31 +5408,37 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   // entire table, since later PCH files in a PCH chain are only interested in
   // the results at the end of the chain.
   RecordData WeakUndeclaredIdentifiers;
-  for (const auto &WeakUndeclaredIdentifierList :
-       SemaRef.WeakUndeclaredIdentifiers) {
-    const IdentifierInfo *const II = WeakUndeclaredIdentifierList.first;
-    for (const auto &WI : WeakUndeclaredIdentifierList.second) {
-      AddIdentifierRef(II, WeakUndeclaredIdentifiers);
-      AddIdentifierRef(WI.getAlias(), WeakUndeclaredIdentifiers);
-      AddSourceLocation(WI.getLocation(), WeakUndeclaredIdentifiers);
+  if (SemaPtr) {
+    for (const auto &WeakUndeclaredIdentifierList :
+         SemaPtr->WeakUndeclaredIdentifiers) {
+      const IdentifierInfo *const II = WeakUndeclaredIdentifierList.first;
+      for (const auto &WI : WeakUndeclaredIdentifierList.second) {
+        AddIdentifierRef(II, WeakUndeclaredIdentifiers);
+        AddIdentifierRef(WI.getAlias(), WeakUndeclaredIdentifiers);
+        AddSourceLocation(WI.getLocation(), WeakUndeclaredIdentifiers);
+      }
     }
   }
 
   // Form the record of special types.
   RecordData SpecialTypes;
-  AddTypeRef(Context, Context.getRawCFConstantStringType(), SpecialTypes);
-  AddTypeRef(Context, Context.getFILEType(), SpecialTypes);
-  AddTypeRef(Context, Context.getjmp_bufType(), SpecialTypes);
-  AddTypeRef(Context, Context.getsigjmp_bufType(), SpecialTypes);
-  AddTypeRef(Context, Context.ObjCIdRedefinitionType, SpecialTypes);
-  AddTypeRef(Context, Context.ObjCClassRedefinitionType, SpecialTypes);
-  AddTypeRef(Context, Context.ObjCSelRedefinitionType, SpecialTypes);
-  AddTypeRef(Context, Context.getucontext_tType(), SpecialTypes);
+  if (SemaPtr) {
+    ASTContext &Context = SemaPtr->Context;
+    AddTypeRef(Context, Context.getRawCFConstantStringType(), SpecialTypes);
+    AddTypeRef(Context, Context.getFILEType(), SpecialTypes);
+    AddTypeRef(Context, Context.getjmp_bufType(), SpecialTypes);
+    AddTypeRef(Context, Context.getsigjmp_bufType(), SpecialTypes);
+    AddTypeRef(Context, Context.ObjCIdRedefinitionType, SpecialTypes);
+    AddTypeRef(Context, Context.ObjCClassRedefinitionType, SpecialTypes);
+    AddTypeRef(Context, Context.ObjCSelRedefinitionType, SpecialTypes);
+    AddTypeRef(Context, Context.getucontext_tType(), SpecialTypes);
+  }
 
-  PrepareWritingSpecialDecls(SemaRef);
+  if (SemaPtr)
+    PrepareWritingSpecialDecls(*SemaPtr);
 
   // Write the control block
-  WriteControlBlock(PP, isysroot);
+  WriteControlBlock(*PP, isysroot);
 
   // Write the remaining AST contents.
   Stream.FlushToWord();
@@ -5448,11 +5460,13 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
   // It's possible that updateOutOfDateSelector can update SelectorIDs. To be
   // safe, we copy all selectors out.
-  llvm::SmallVector<Selector, 256> AllSelectors;
-  for (auto &SelectorAndID : SelectorIDs)
-    AllSelectors.push_back(SelectorAndID.first);
-  for (auto &Selector : AllSelectors)
-    SemaRef.ObjC().updateOutOfDateSelector(Selector);
+  if (SemaPtr) {
+    llvm::SmallVector<Selector, 256> AllSelectors;
+    for (auto &SelectorAndID : SelectorIDs)
+      AllSelectors.push_back(SelectorAndID.first);
+    for (auto &Selector : AllSelectors)
+      SemaPtr->ObjC().updateOutOfDateSelector(Selector);
+  }
 
   if (Chain) {
     // Write the mapping information describing our module dependencies and how
@@ -5516,28 +5530,35 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
                               Buffer.data(), Buffer.size());
   }
 
-  WriteDeclAndTypes(Context);
+  if (SemaPtr)
+    WriteDeclAndTypes(SemaPtr->Context);
 
   WriteFileDeclIDsMap();
-  WriteSourceManagerBlock(PP.getSourceManager());
-  WriteComments(Context);
-  WritePreprocessor(PP, isModule);
-  WriteHeaderSearch(PP.getHeaderSearchInfo());
-  WriteSelectors(SemaRef);
-  WriteReferencedSelectorsPool(SemaRef);
-  WriteLateParsedTemplates(SemaRef);
-  WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
-  WriteFPPragmaOptions(SemaRef.CurFPFeatureOverrides());
-  WriteOpenCLExtensions(SemaRef);
-  WriteCUDAPragmas(SemaRef);
+  WriteSourceManagerBlock(PP->getSourceManager());
+  if (SemaPtr)
+    WriteComments(SemaPtr->Context);
+  WritePreprocessor(*PP, isModule);
+  WriteHeaderSearch(PP->getHeaderSearchInfo());
+  if (SemaPtr) {
+    WriteSelectors(*SemaPtr);
+    WriteReferencedSelectorsPool(*SemaPtr);
+    WriteLateParsedTemplates(*SemaPtr);
+  }
+  WriteIdentifierTable(*PP, SemaPtr ? &SemaPtr->IdResolver : nullptr, isModule);
+  if (SemaPtr) {
+    WriteFPPragmaOptions(SemaPtr->CurFPFeatureOverrides());
+    WriteOpenCLExtensions(*SemaPtr);
+    WriteCUDAPragmas(*SemaPtr);
+  }
 
   // If we're emitting a module, write out the submodule information.
   if (WritingModule)
-    WriteSubmodules(WritingModule, SemaRef.Context);
+    WriteSubmodules(WritingModule, SemaPtr ? &SemaPtr->Context : nullptr);
 
   Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes);
 
-  WriteSpecialDeclRecords(SemaRef);
+  if (SemaPtr)
+    WriteSpecialDeclRecords(*SemaPtr);
 
   // Write the record containing weak undeclared identifiers.
   if (!WeakUndeclaredIdentifiers.empty())
@@ -5552,10 +5573,12 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
       ModuleInfo(uint64_t ID, Module *M) : ID(ID), M(M) {}
     };
     llvm::SmallVector<ModuleInfo, 64> Imports;
-    for (const auto *I : Context.local_imports()) {
-      assert(SubmoduleIDs.contains(I->getImportedModule()));
-      Imports.push_back(ModuleInfo(SubmoduleIDs[I->getImportedModule()],
-                         I->getImportedModule()));
+    if (SemaPtr) {
+      for (const auto *I : SemaPtr->Context.local_imports()) {
+        assert(SubmoduleIDs.contains(I->getImportedModule()));
+        Imports.push_back(ModuleInfo(SubmoduleIDs[I->getImportedModule()],
+                                     I->getImportedModule()));
+      }
     }
 
     if (!Imports.empty()) {
@@ -5577,7 +5600,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
         // FIXME: If the module has macros imported then later has declarations
         // imported, this location won't be the right one as a location for the
         // declaration imports.
-        AddSourceLocation(PP.getModuleImportLoc(Import.M), ImportedModules);
+        AddSourceLocation(PP->getModuleImportLoc(Import.M), ImportedModules);
       }
 
       Stream.EmitRecord(IMPORTED_MODULES, ImportedModules);
@@ -5585,14 +5608,16 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   }
 
   WriteObjCCategories();
-  if(!WritingModule) {
-    WriteOptimizePragmaOptions(SemaRef);
-    WriteMSStructPragmaOptions(SemaRef);
-    WriteMSPointersToMembersPragmaOptions(SemaRef);
+  if (SemaPtr) {
+    if (!WritingModule) {
+      WriteOptimizePragmaOptions(*SemaPtr);
+      WriteMSStructPragmaOptions(*SemaPtr);
+      WriteMSPointersToMembersPragmaOptions(*SemaPtr);
+    }
+    WritePackPragmaOptions(*SemaPtr);
+    WriteFloatControlPragmaOptions(*SemaPtr);
+    WriteDeclsWithEffectsToVerify(*SemaPtr);
   }
-  WritePackPragmaOptions(SemaRef);
-  WriteFloatControlPragmaOptions(SemaRef);
-  WriteDeclsWithEffectsToVerify(SemaRef);
 
   // Some simple statistics
   RecordData::value_type Record[] = {
@@ -5603,8 +5628,9 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3;
 
   // Write the module file extension blocks.
-  for (const auto &ExtWriter : ModuleFileExtensionWriters)
-    WriteModuleFileExtension(SemaRef, *ExtWriter);
+  if (SemaPtr)
+    for (const auto &ExtWriter : ModuleFileExtensionWriters)
+      WriteModuleFileExtension(*SemaPtr, *ExtWriter);
 
   return backpatchSignature();
 }
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index cc06106a47708e..fdd240b03fd8df 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -74,7 +74,7 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
 
   // Emit the PCH file to the Buffer.
   assert(SemaPtr && "No Sema?");
-  Buffer->Signature = Writer.WriteAST(*SemaPtr, OutputFile, Module, isysroot,
+  Buffer->Signature = Writer.WriteAST(SemaPtr, OutputFile, Module, isysroot,
                                       ShouldCacheASTInMemory);
 
   Buffer->IsComplete = true;

>From dff27355c6bcb0dd3729ba94331ebde64d477acb Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 6 Nov 2024 14:16:51 -0800
Subject: [PATCH 3/4] [clang][deps] Only write preprocessor info into PCMs

---
 clang/include/clang/Lex/HeaderSearchOptions.h |  5 +++++
 clang/include/clang/Serialization/ASTWriter.h |  8 +++-----
 clang/lib/Serialization/GeneratePCH.cpp       | 19 ++++++++++++++-----
 .../DependencyScanningWorker.cpp              |  1 +
 4 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index c85e3d27281701..7a16926c186d2c 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -255,6 +255,10 @@ class HeaderSearchOptions {
   LLVM_PREFERRED_TYPE(bool)
   unsigned ModulesHashContent : 1;
 
+  /// Whether AST files should only contain the preprocessor information.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned ModulesSerializeOnlyPreprocessor : 1;
+
   /// Whether we should include all things that could impact the module in the
   /// hash.
   ///
@@ -288,6 +292,7 @@ class HeaderSearchOptions {
         ModulesSkipHeaderSearchPaths(false),
         ModulesSkipPragmaDiagnosticMappings(false),
         ModulesPruneNonAffectingModuleMaps(true), ModulesHashContent(false),
+        ModulesSerializeOnlyPreprocessor(false),
         ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false),
         AllowModuleMapSubdirectorySearch(true) {}
 
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 7b5b0117874d7a..51253ea3c63ec7 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -923,9 +923,9 @@ class PCHGenerator : public SemaConsumer {
   void anchor() override;
 
   Preprocessor &PP;
+  llvm::PointerUnion<Sema *, Preprocessor *> Subject;
   std::string OutputFile;
   std::string isysroot;
-  Sema *SemaPtr;
   std::shared_ptr<PCHBuffer> Buffer;
   llvm::BitstreamWriter Stream;
   ASTWriter Writer;
@@ -940,9 +940,7 @@ class PCHGenerator : public SemaConsumer {
   bool isComplete() const { return Buffer->IsComplete; }
   PCHBuffer *getBufferPtr() { return Buffer.get(); }
   StringRef getOutputFile() const { return OutputFile; }
-  DiagnosticsEngine &getDiagnostics() const {
-    return SemaPtr->getDiagnostics();
-  }
+  DiagnosticsEngine &getDiagnostics() const;
   Preprocessor &getPreprocessor() { return PP; }
 
   virtual Module *getEmittingModule(ASTContext &Ctx);
@@ -958,7 +956,7 @@ class PCHGenerator : public SemaConsumer {
                bool GeneratingReducedBMI = false);
   ~PCHGenerator() override;
 
-  void InitializeSema(Sema &S) override { SemaPtr = &S; }
+  void InitializeSema(Sema &S) override;
   void HandleTranslationUnit(ASTContext &Ctx) override;
   void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); }
   ASTMutationListener *GetASTMutationListener() override;
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index fdd240b03fd8df..7a8a951b34f251 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -29,8 +29,8 @@ PCHGenerator::PCHGenerator(
     bool AllowASTWithErrors, bool IncludeTimestamps,
     bool BuildingImplicitModule, bool ShouldCacheASTInMemory,
     bool GeneratingReducedBMI)
-    : PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()),
-      SemaPtr(nullptr), Buffer(std::move(Buffer)), Stream(this->Buffer->Data),
+    : PP(PP), Subject(&PP), OutputFile(OutputFile), isysroot(isysroot.str()),
+      Buffer(std::move(Buffer)), Stream(this->Buffer->Data),
       Writer(Stream, this->Buffer->Data, ModuleCache, Extensions,
              IncludeTimestamps, BuildingImplicitModule, GeneratingReducedBMI),
       AllowASTWithErrors(AllowASTWithErrors),
@@ -56,6 +56,17 @@ Module *PCHGenerator::getEmittingModule(ASTContext &) {
   return M;
 }
 
+DiagnosticsEngine &PCHGenerator::getDiagnostics() const {
+  return PP.getDiagnostics();
+}
+
+void PCHGenerator::InitializeSema(Sema &S) {
+  if (!PP.getHeaderSearchInfo()
+           .getHeaderSearchOpts()
+           .ModulesSerializeOnlyPreprocessor)
+    Subject = &S;
+}
+
 void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   // Don't create a PCH if there were fatal failures during module loading.
   if (PP.getModuleLoader().HadFatalFailure)
@@ -72,9 +83,7 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   if (AllowASTWithErrors)
     PP.getDiagnostics().getClient()->clear();
 
-  // Emit the PCH file to the Buffer.
-  assert(SemaPtr && "No Sema?");
-  Buffer->Signature = Writer.WriteAST(SemaPtr, OutputFile, Module, isysroot,
+  Buffer->Signature = Writer.WriteAST(Subject, OutputFile, Module, isysroot,
                                       ShouldCacheASTInMemory);
 
   Buffer->IsComplete = true;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index d77187bfb1f2b8..55d116c15bdf9e 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -420,6 +420,7 @@ class DependencyScanningAction : public tooling::ToolAction {
     // TODO: Implement diagnostic bucketing to reduce the impact of strict
     // context hashing.
     ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
+    ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true;
     ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
     ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
     ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings =

>From 446f239b8080e22403fa8c648e25c61b2d76bf9c Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 6 Nov 2024 14:21:51 -0800
Subject: [PATCH 4/4] [clang][serialization] Allow special types to be empty

---
 clang/lib/Serialization/ASTReader.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 79615dc3c018ea..8d2debf84f5ad8 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3529,6 +3529,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         break;
       }
 
+      if (Record.empty())
+        break;
+
       if (SpecialTypes.size() != Record.size())
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid special-types record");



More information about the cfe-commits mailing list