[clang] 25d1ac1 - [clang][deps] Only write preprocessor info into PCMs (#115239)

via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 11 13:07:11 PST 2024


Author: Jan Svoboda
Date: 2024-11-11T13:07:08-08:00
New Revision: 25d1ac11d537debb217c65c2bcdd087a60cff58e

URL: https://github.com/llvm/llvm-project/commit/25d1ac11d537debb217c65c2bcdd087a60cff58e
DIFF: https://github.com/llvm/llvm-project/commit/25d1ac11d537debb217c65c2bcdd087a60cff58e.diff

LOG: [clang][deps] Only write preprocessor info into PCMs (#115239)

This patch builds on top of
https://github.com/llvm/llvm-project/pull/115237 and
https://github.com/llvm/llvm-project/pull/115235, only passing the
`Preprocessor` object to `ASTWriter`. This reduces the size of scanning
PCM files by 1/3 and speeds up scans by 16%.

Added: 
    

Modified: 
    clang/include/clang/Lex/HeaderSearchOptions.h
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/GeneratePCH.cpp
    clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index c85e3d27281701..7a16926c186d2c 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -255,6 +255,10 @@ class HeaderSearchOptions {
   LLVM_PREFERRED_TYPE(bool)
   unsigned ModulesHashContent : 1;
 
+  /// Whether AST files should only contain the preprocessor information.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned ModulesSerializeOnlyPreprocessor : 1;
+
   /// Whether we should include all things that could impact the module in the
   /// hash.
   ///
@@ -288,6 +292,7 @@ class HeaderSearchOptions {
         ModulesSkipHeaderSearchPaths(false),
         ModulesSkipPragmaDiagnosticMappings(false),
         ModulesPruneNonAffectingModuleMaps(true), ModulesHashContent(false),
+        ModulesSerializeOnlyPreprocessor(false),
         ModulesStrictContextHash(false), ModulesIncludeVFSUsage(false),
         AllowModuleMapSubdirectorySearch(true) {}
 

diff  --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index da35b32699811a..161b2ef7c86a49 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -929,9 +929,9 @@ class PCHGenerator : public SemaConsumer {
   void anchor() override;
 
   Preprocessor &PP;
+  llvm::PointerUnion<Sema *, Preprocessor *> Subject;
   std::string OutputFile;
   std::string isysroot;
-  Sema *SemaPtr;
   std::shared_ptr<PCHBuffer> Buffer;
   llvm::BitstreamWriter Stream;
   ASTWriter Writer;
@@ -946,9 +946,7 @@ class PCHGenerator : public SemaConsumer {
   bool isComplete() const { return Buffer->IsComplete; }
   PCHBuffer *getBufferPtr() { return Buffer.get(); }
   StringRef getOutputFile() const { return OutputFile; }
-  DiagnosticsEngine &getDiagnostics() const {
-    return SemaPtr->getDiagnostics();
-  }
+  DiagnosticsEngine &getDiagnostics() const;
   Preprocessor &getPreprocessor() { return PP; }
 
   virtual Module *getEmittingModule(ASTContext &Ctx);
@@ -964,7 +962,7 @@ class PCHGenerator : public SemaConsumer {
                bool GeneratingReducedBMI = false);
   ~PCHGenerator() override;
 
-  void InitializeSema(Sema &S) override { SemaPtr = &S; }
+  void InitializeSema(Sema &S) override;
   void HandleTranslationUnit(ASTContext &Ctx) override;
   void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); }
   ASTMutationListener *GetASTMutationListener() override;

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index e14061caccd63c..cf987df0e2154d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3522,6 +3522,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         break;
       }
 
+      if (Record.empty())
+        break;
+
       if (SpecialTypes.size() != Record.size())
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid special-types record");

diff  --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index fdd240b03fd8df..7a8a951b34f251 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -29,8 +29,8 @@ PCHGenerator::PCHGenerator(
     bool AllowASTWithErrors, bool IncludeTimestamps,
     bool BuildingImplicitModule, bool ShouldCacheASTInMemory,
     bool GeneratingReducedBMI)
-    : PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()),
-      SemaPtr(nullptr), Buffer(std::move(Buffer)), Stream(this->Buffer->Data),
+    : PP(PP), Subject(&PP), OutputFile(OutputFile), isysroot(isysroot.str()),
+      Buffer(std::move(Buffer)), Stream(this->Buffer->Data),
       Writer(Stream, this->Buffer->Data, ModuleCache, Extensions,
              IncludeTimestamps, BuildingImplicitModule, GeneratingReducedBMI),
       AllowASTWithErrors(AllowASTWithErrors),
@@ -56,6 +56,17 @@ Module *PCHGenerator::getEmittingModule(ASTContext &) {
   return M;
 }
 
+DiagnosticsEngine &PCHGenerator::getDiagnostics() const {
+  return PP.getDiagnostics();
+}
+
+void PCHGenerator::InitializeSema(Sema &S) {
+  if (!PP.getHeaderSearchInfo()
+           .getHeaderSearchOpts()
+           .ModulesSerializeOnlyPreprocessor)
+    Subject = &S;
+}
+
 void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   // Don't create a PCH if there were fatal failures during module loading.
   if (PP.getModuleLoader().HadFatalFailure)
@@ -72,9 +83,7 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   if (AllowASTWithErrors)
     PP.getDiagnostics().getClient()->clear();
 
-  // Emit the PCH file to the Buffer.
-  assert(SemaPtr && "No Sema?");
-  Buffer->Signature = Writer.WriteAST(SemaPtr, OutputFile, Module, isysroot,
+  Buffer->Signature = Writer.WriteAST(Subject, OutputFile, Module, isysroot,
                                       ShouldCacheASTInMemory);
 
   Buffer->IsComplete = true;

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 1deffe68003804..fd1b7af0600da7 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -422,6 +422,7 @@ class DependencyScanningAction : public tooling::ToolAction {
     // TODO: Implement diagnostic bucketing to reduce the impact of strict
     // context hashing.
     ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
+    ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true;
     ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
     ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
     ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings =


        


More information about the cfe-commits mailing list