[clang] 6c465a2 - [clang][deps] Skip slow `UNHASHED_CONTROL_BLOCK` records (#69975)

via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 2 15:08:02 PDT 2023


Author: Jan Svoboda
Date: 2023-11-02T15:07:58-07:00
New Revision: 6c465a201b02f3316efc55eef6909080866f6fb0

URL: https://github.com/llvm/llvm-project/commit/6c465a201b02f3316efc55eef6909080866f6fb0
DIFF: https://github.com/llvm/llvm-project/commit/6c465a201b02f3316efc55eef6909080866f6fb0.diff

LOG: [clang][deps] Skip slow `UNHASHED_CONTROL_BLOCK` records (#69975)

Deserialization of the `DIAGNOSTIC_OPTIONS` and `HEADER_SEARCH_PATHS`
records is slow and done for every transitively loaded PCM.
Deserialization of these records cannot be skipped, because the words
are VBR6-encoded and we don't store the length of the entire record. We
could either turn them into binary blobs that can be skipped during
deserialization, or skip writing them altogether. This patch takes the
latter approach, since these records are not necessary in scanning PCMs.
The scanner doesn't make any guarantees about the accuracy of
diagnostics, and we always have the same header search paths due to
strict context hashing.

The commit that makes the `DIAGNOSTIC_OPTIONS` record skippable was
originally implemented by @benlangmuir in a downstream repo.

Added: 
    clang/test/Modules/diagnostic-options-mismatch.c
    clang/test/Modules/header-search-paths-mismatch.c

Modified: 
    clang/include/clang/Driver/Options.td
    clang/include/clang/Lex/HeaderSearchOptions.h
    clang/lib/Frontend/FrontendActions.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index fcf6a4b2ccb2369..9c0dcaa8b3f6276 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2947,6 +2947,14 @@ def fno_modules_validate_textual_header_includes :
   MarshallingInfoNegativeFlag<LangOpts<"ModulesValidateTextualHeaderIncludes">>,
   HelpText<"Do not enforce -fmodules-decluse and private header restrictions for textual headers. "
            "This flag will be removed in a future Clang release.">;
+defm modules_skip_diagnostic_options : BoolFOption<"modules-skip-diagnostic-options",
+    HeaderSearchOpts<"ModulesSkipDiagnosticOptions">, DefaultFalse,
+    PosFlag<SetTrue, [], [], "Disable writing diagnostic options">,
+    NegFlag<SetFalse>, BothFlags<[], [CC1Option]>>;
+defm modules_skip_header_search_paths : BoolFOption<"modules-skip-header-search-paths",
+    HeaderSearchOpts<"ModulesSkipHeaderSearchPaths">, DefaultFalse,
+    PosFlag<SetTrue, [], [], "Disable writing header search paths">,
+    NegFlag<SetFalse>, BothFlags<[], [CC1Option]>>;
 
 def fincremental_extensions :
   Flag<["-"], "fincremental-extensions">,

diff  --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index c7d95006bb779ad..7f162efe4f0d4b3 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -219,6 +219,14 @@ class HeaderSearchOptions {
 
   unsigned ModulesValidateDiagnosticOptions : 1;
 
+  /// Whether to entirely skip writing diagnostic options.
+  /// Primarily used to speed up deserialization during dependency scanning.
+  unsigned ModulesSkipDiagnosticOptions : 1;
+
+  /// Whether to entirely skip writing header search paths.
+  /// Primarily used to speed up deserialization during dependency scanning.
+  unsigned ModulesSkipHeaderSearchPaths : 1;
+
   unsigned ModulesHashContent : 1;
 
   /// Whether we should include all things that could impact the module in the
@@ -238,7 +246,9 @@ class HeaderSearchOptions {
         ModulesValidateSystemHeaders(false),
         ValidateASTInputFilesContent(false),
         ForceCheckCXX20ModulesInputFiles(false), UseDebugInfo(false),
-        ModulesValidateDiagnosticOptions(true), ModulesHashContent(false),
+        ModulesValidateDiagnosticOptions(true),
+        ModulesSkipDiagnosticOptions(false),
+        ModulesSkipHeaderSearchPaths(false), ModulesHashContent(false),
         ModulesStrictContextHash(false) {}
 
   /// AddPath - Add the \p Path path to the specified \p Group list.

diff  --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index ce6e0b7126b635a..2afcf1cf9f68c81 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -661,6 +661,21 @@ namespace {
       return false;
     }
 
+    bool ReadHeaderSearchPaths(const HeaderSearchOptions &HSOpts,
+                               bool Complain) override {
+      Out.indent(2) << "Header search paths:\n";
+      Out.indent(4) << "User entries:\n";
+      for (const auto &Entry : HSOpts.UserEntries)
+        Out.indent(6) << Entry.Path << "\n";
+      Out.indent(4) << "System header prefixes:\n";
+      for (const auto &Prefix : HSOpts.SystemHeaderPrefixes)
+        Out.indent(6) << Prefix.Prefix << "\n";
+      Out.indent(4) << "VFS overlay files:\n";
+      for (const auto &Overlay : HSOpts.VFSOverlayFiles)
+        Out.indent(6) << Overlay << "\n";
+      return false;
+    }
+
     bool ReadPreprocessorOptions(const PreprocessorOptions &PPOpts,
                                  bool ReadMacros, bool Complain,
                                  std::string &SuggestedPredefines) override {

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ab70594607530f4..99358004ffd21d3 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1212,52 +1212,54 @@ void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
     Record.clear();
   }
 
+  const auto &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts();
+
   // Diagnostic options.
   const auto &Diags = Context.getDiagnostics();
   const DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
+  if (!HSOpts.ModulesSkipDiagnosticOptions) {
 #define DIAGOPT(Name, Bits, Default) Record.push_back(DiagOpts.Name);
 #define ENUM_DIAGOPT(Name, Type, Bits, Default)                                \
   Record.push_back(static_cast<unsigned>(DiagOpts.get##Name()));
 #include "clang/Basic/DiagnosticOptions.def"
-  Record.push_back(DiagOpts.Warnings.size());
-  for (unsigned I = 0, N = DiagOpts.Warnings.size(); I != N; ++I)
-    AddString(DiagOpts.Warnings[I], Record);
-  Record.push_back(DiagOpts.Remarks.size());
-  for (unsigned I = 0, N = DiagOpts.Remarks.size(); I != N; ++I)
-    AddString(DiagOpts.Remarks[I], Record);
-  // Note: we don't serialize the log or serialization file names, because they
-  // are generally transient files and will almost always be overridden.
-  Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record);
-  Record.clear();
+    Record.push_back(DiagOpts.Warnings.size());
+    for (unsigned I = 0, N = DiagOpts.Warnings.size(); I != N; ++I)
+      AddString(DiagOpts.Warnings[I], Record);
+    Record.push_back(DiagOpts.Remarks.size());
+    for (unsigned I = 0, N = DiagOpts.Remarks.size(); I != N; ++I)
+      AddString(DiagOpts.Remarks[I], Record);
+    // Note: we don't serialize the log or serialization file names, because
+    // they are generally transient files and will almost always be overridden.
+    Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record);
+    Record.clear();
+  }
 
   // Header search paths.
-  Record.clear();
-  const HeaderSearchOptions &HSOpts =
-      PP.getHeaderSearchInfo().getHeaderSearchOpts();
-
-  // Include entries.
-  Record.push_back(HSOpts.UserEntries.size());
-  for (unsigned I = 0, N = HSOpts.UserEntries.size(); I != N; ++I) {
-    const HeaderSearchOptions::Entry &Entry = HSOpts.UserEntries[I];
-    AddString(Entry.Path, Record);
-    Record.push_back(static_cast<unsigned>(Entry.Group));
-    Record.push_back(Entry.IsFramework);
-    Record.push_back(Entry.IgnoreSysRoot);
-  }
+  if (!HSOpts.ModulesSkipHeaderSearchPaths) {
+    // Include entries.
+    Record.push_back(HSOpts.UserEntries.size());
+    for (unsigned I = 0, N = HSOpts.UserEntries.size(); I != N; ++I) {
+      const HeaderSearchOptions::Entry &Entry = HSOpts.UserEntries[I];
+      AddString(Entry.Path, Record);
+      Record.push_back(static_cast<unsigned>(Entry.Group));
+      Record.push_back(Entry.IsFramework);
+      Record.push_back(Entry.IgnoreSysRoot);
+    }
 
-  // System header prefixes.
-  Record.push_back(HSOpts.SystemHeaderPrefixes.size());
-  for (unsigned I = 0, N = HSOpts.SystemHeaderPrefixes.size(); I != N; ++I) {
-    AddString(HSOpts.SystemHeaderPrefixes[I].Prefix, Record);
-    Record.push_back(HSOpts.SystemHeaderPrefixes[I].IsSystemHeader);
-  }
+    // System header prefixes.
+    Record.push_back(HSOpts.SystemHeaderPrefixes.size());
+    for (unsigned I = 0, N = HSOpts.SystemHeaderPrefixes.size(); I != N; ++I) {
+      AddString(HSOpts.SystemHeaderPrefixes[I].Prefix, Record);
+      Record.push_back(HSOpts.SystemHeaderPrefixes[I].IsSystemHeader);
+    }
 
-  // VFS overlay files.
-  Record.push_back(HSOpts.VFSOverlayFiles.size());
-  for (StringRef VFSOverlayFile : HSOpts.VFSOverlayFiles)
-    AddString(VFSOverlayFile, Record);
+    // VFS overlay files.
+    Record.push_back(HSOpts.VFSOverlayFiles.size());
+    for (StringRef VFSOverlayFile : HSOpts.VFSOverlayFiles)
+      AddString(VFSOverlayFile, Record);
 
-  Stream.EmitRecord(HEADER_SEARCH_PATHS, Record);
+    Stream.EmitRecord(HEADER_SEARCH_PATHS, Record);
+  }
 
   // Write out the diagnostic/pragma mappings.
   WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule);

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 29df0c3a0afdb5c..dc7bb289cf8f905 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -252,6 +252,8 @@ class DependencyScanningAction : public tooling::ToolAction {
     // TODO: Implement diagnostic bucketing to reduce the impact of strict
     // context hashing.
     ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
+    ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
+    ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
 
     // Avoid some checks and module map parsing when loading PCM files.
     ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false;

diff  --git a/clang/test/Modules/diagnostic-options-mismatch.c b/clang/test/Modules/diagnostic-options-mismatch.c
new file mode 100644
index 000000000000000..cdd6f897729a9d8
--- /dev/null
+++ b/clang/test/Modules/diagnostic-options-mismatch.c
@@ -0,0 +1,41 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: split-file %s %t
+
+//--- module.modulemap
+module Mod { header "mod.h" }
+//--- mod.h
+//--- tu.c
+#include "mod.h"
+
+// Without any extra compiler flags, mismatched diagnostic options trigger recompilation of modules.
+//
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache1 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -Wnon-modular-include-in-module
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache1 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -Werror=non-modular-include-in-module -Rmodule-build 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=DID-REBUILD
+// DID-REBUILD: remark: building module 'Mod'
+
+// When skipping serialization of diagnostic options, mismatches cannot be detected, old PCM file gets reused.
+//
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache2 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -fmodules-skip-diagnostic-options -Wnon-modular-include-in-module
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache2 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -fmodules-skip-diagnostic-options -Werror=non-modular-include-in-module -Rmodule-build 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=DID-REUSE --allow-empty
+// DID-REUSE-NOT: remark: building module 'Mod'
+//
+// RUN: %clang_cc1 -module-file-info %t/cache2/Mod.pcm | FileCheck %s --check-prefix=NO-DIAG-OPTS
+// NO-DIAG-OPTS-NOT: Diagnostic flags:
+
+// When disabling validation of diagnostic options, mismatches are not checked for, old PCM file gets reused.
+//
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache3 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -fmodules-disable-diagnostic-validation -Wnon-modular-include-in-module
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache3 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -fmodules-disable-diagnostic-validation -Werror=non-modular-include-in-module -Rmodule-build 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=DID-REUSE --allow-empty
+//
+// RUN: %clang_cc1 -module-file-info %t/cache3/Mod.pcm | FileCheck %s --check-prefix=OLD-DIAG-OPTS
+// OLD-DIAG-OPTS: Diagnostic flags:
+// OLD-DIAG-OPTS-NEXT: -Wnon-modular-include-in-module

diff  --git a/clang/test/Modules/header-search-paths-mismatch.c b/clang/test/Modules/header-search-paths-mismatch.c
new file mode 100644
index 000000000000000..8f980fdc5d9240b
--- /dev/null
+++ b/clang/test/Modules/header-search-paths-mismatch.c
@@ -0,0 +1,36 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: split-file %s %t
+
+//--- module.modulemap
+module Mod { header "mod.h" }
+//--- mod.h
+//--- tu.c
+#include "mod.h"
+
+//--- one/foo.h
+//--- two/foo.h
+
+// By default, mismatched header search paths are ignored, old PCM file gets reused.
+//
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache1 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -I %t/one
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache1 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -I %t/two -Rmodule-build 2>&1 \
+// RUN:   | FileCheck %s --allow-empty --check-prefix=DID-REUSE
+// DID-REUSE-NOT: remark: building module 'Mod'
+//
+// RUN: %clang_cc1 -module-file-info %t/cache1/Mod.pcm | FileCheck %s --check-prefix=HS-PATHS
+// HS-PATHS:      Header search paths:
+// HS-PATHS-NEXT:   User entries:
+// HS-PATHS-NEXT:     one
+
+// When skipping serialization of header search paths, mismatches cannot be detected, old PCM file gets reused.
+//
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache2 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -fmodules-skip-header-search-paths -I %t/one
+// RUN: %clang_cc1 -fmodules -fmodule-map-file=%t/module.modulemap -fmodules-cache-path=%t/cache2 -fdisable-module-hash \
+// RUN:   -fsyntax-only %t/tu.c -fmodules-skip-header-search-paths -I %t/two -Rmodule-build 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=DID-REUSE --allow-empty
+//
+// RUN: %clang_cc1 -module-file-info %t/cache2/Mod.pcm | FileCheck %s --check-prefix=NO-HS-PATHS
+// NO-HS-PATHS-NOT: Header search paths:


        


More information about the cfe-commits mailing list