[clang] 54acda2 - [clang module] Current Working Directory Pruning (#124786)

via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 4 20:04:42 PST 2025


Author: Qiongsi Wu
Date: 2025-02-04T20:04:39-08:00
New Revision: 54acda2e0ebdf240deeef4d51fc3240c5548dbb7

URL: https://github.com/llvm/llvm-project/commit/54acda2e0ebdf240deeef4d51fc3240c5548dbb7
DIFF: https://github.com/llvm/llvm-project/commit/54acda2e0ebdf240deeef4d51fc3240c5548dbb7.diff

LOG: [clang module] Current Working Directory Pruning (#124786)

When computing the context hash, `clang` always includes the compiler's
working directory. This can lead to situations when the only difference
between two compilations is the working directory, different module
variants are generated. These variants are redundant. This PR implements
an optimization that ignores the working directory when computing the
context hash when safe.

Specifically, `clang` checks if it is safe to ignore the working
directory in `isSafeToIgnoreCWD`. The check involves going through
compile command options to see if any paths specified are relative. The
definition of relative path used here is that the input path is not
empty, and `llvm::sys::path::is_absolute` is false. If all the paths
examined are not relative, `clang` considers it safe to ignore the
current working directory and does not consider the working directory
when computing the context hash.

Added: 
    clang/test/ClangScanDeps/modules-context-hash-cwd.c

Modified: 
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
    clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
    clang/test/ClangScanDeps/working-dir.m
    clang/tools/clang-scan-deps/ClangScanDeps.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
index 4a343f2872d8d9..f002f8645d3f61 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -55,15 +55,18 @@ enum class ScanningOptimizations {
   HeaderSearch = 1,
 
   /// Remove warnings from system modules.
-  SystemWarnings = 2,
+  SystemWarnings = (1 << 1),
 
   /// Remove unused -ivfsoverlay arguments.
-  VFS = 4,
+  VFS = (1 << 2),
 
   /// Canonicalize -D and -U options.
-  Macros = 8,
+  Macros = (1 << 3),
 
-  DSS_LAST_BITMASK_ENUM(Macros),
+  /// Ignore the compiler's working directory if it is safe.
+  IgnoreCWD = (1 << 4),
+
+  DSS_LAST_BITMASK_ENUM(IgnoreCWD),
   Default = All
 };
 

diff  --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 732de7b82475df..1c5f4c4b50ab6a 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -394,9 +394,91 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
   }
 }
 
+static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) {
+  // Check if the command line input uses relative paths.
+  // It is not safe to ignore the current working directory if any of the
+  // command line inputs use relative paths.
+#define IF_RELATIVE_RETURN_FALSE(PATH)                                         \
+  do {                                                                         \
+    if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH))                  \
+      return false;                                                            \
+  } while (0)
+
+#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS)                                    \
+  do {                                                                         \
+    if (llvm::any_of(PATHS, [](const auto &P) {                                \
+          return !P.empty() && !llvm::sys::path::is_absolute(P);               \
+        }))                                                                    \
+      return false;                                                            \
+  } while (0)
+
+  // Header search paths.
+  const auto &HeaderSearchOpts = CI.getHeaderSearchOpts();
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot);
+  for (auto &Entry : HeaderSearchOpts.UserEntries)
+    if (Entry.IgnoreSysRoot)
+      IF_RELATIVE_RETURN_FALSE(Entry.Path);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath);
+  for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(),
+            E = HeaderSearchOpts.PrebuiltModuleFiles.end();
+       I != E;) {
+    auto Current = I++;
+    IF_RELATIVE_RETURN_FALSE(Current->second);
+  }
+  IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths);
+  IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles);
+
+  // Preprocessor options.
+  const auto &PPOpts = CI.getPreprocessorOpts();
+  IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes);
+  IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes);
+  IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude);
+
+  // Frontend options.
+  const auto &FrontendOpts = CI.getFrontendOpts();
+  for (const FrontendInputFile &Input : FrontendOpts.Inputs) {
+    if (Input.isBuffer())
+      continue; // FIXME: Can this happen when parsing command-line?
+
+    IF_RELATIVE_RETURN_FALSE(Input.getFile());
+  }
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles);
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile);
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile);
+
+  // Filesystem options.
+  const auto &FileSystemOpts = CI.getFileSystemOpts();
+  IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir);
+
+  // Codegen options.
+  const auto &CodeGenOpts = CI.getCodeGenOpts();
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir);
+
+  // Sanitizer options.
+  IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles);
+
+  // Coverage mappings.
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile);
+
+  // Dependency output options.
+  for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps)
+    IF_RELATIVE_RETURN_FALSE(ExtraDep.first);
+
+  return true;
+}
+
 static std::string getModuleContextHash(const ModuleDeps &MD,
                                         const CowCompilerInvocation &CI,
-                                        bool EagerLoadModules,
+                                        bool EagerLoadModules, bool IgnoreCWD,
                                         llvm::vfs::FileSystem &VFS) {
   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
       HashBuilder;
@@ -407,8 +489,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
   HashBuilder.add(getClangFullRepositoryVersion());
   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
   llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
-  if (CWD)
+  auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
+  if (CWD && !IgnoreCWD)
     HashBuilder.add(*CWD);
+  else
+    FSOpts.WorkingDir.clear();
 
   // Hash the BuildInvocation without any input files.
   SmallString<0> ArgVec;
@@ -440,8 +525,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
 
 void ModuleDepCollector::associateWithContextHash(
     const CowCompilerInvocation &CI, ModuleDeps &Deps) {
-  Deps.ID.ContextHash = getModuleContextHash(
-      Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
+  bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) &&
+                   isSafeToIgnoreCWD(CI);
+  Deps.ID.ContextHash =
+      getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD,
+                           ScanInstance.getVirtualFileSystem());
   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
   (void)Inserted;
   assert(Inserted && "duplicate module mapping");

diff  --git a/clang/test/ClangScanDeps/modules-context-hash-cwd.c b/clang/test/ClangScanDeps/modules-context-hash-cwd.c
new file mode 100644
index 00000000000000..459d2c90debe67
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-context-hash-cwd.c
@@ -0,0 +1,188 @@
+// Test current directory pruning when computing the context hash.
+
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb3.json.in > %t/cdb3.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb4.json.in > %t/cdb4.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb5.json.in > %t/cdb5.json
+// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json
+// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json
+// It is not a typo to use cdb1.json for result2. We intend to use the same
+// compilation database, but 
diff erent clang-scan-deps optimize-args options.
+// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json
+// RUN: clang-scan-deps -compilation-database %t/cdb3.json -format experimental-full > %t/result3.json
+// RUN: clang-scan-deps -compilation-database %t/cdb4.json -format experimental-full > %t/result4.json
+// RUN: clang-scan-deps -compilation-database %t/cdb5.json -format experimental-full > %t/result5.json
+// RUN: cat %t/result0.json %t/result1.json | FileCheck %s
+// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT
+// RUN: cat %t/result3.json %t/result4.json | FileCheck %s -check-prefix=RELPATH
+// RUN: cat %t/result0.json %t/result5.json | FileCheck %s
+
+//--- cdb0.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb1.json.in
+[{
+  "directory": "DIR/a",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+// cdb2 is skipped because we reuse cdb1.
+
+//--- cdb3.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb4.json.in
+[{
+  "directory": "DIR/a/",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb5.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -Xclang -working-directory=DIR/a/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+//--- include/module.modulemap
+module mod {
+  header "mod.h"
+}
+
+//--- include/mod.h
+
+//--- tu.c
+#include "mod.h"
+
+// Check that result0 and result1/result5 compute the same hash with
+// optimization on. The only 
diff erence between result0 and result1/result5 is
+// the compiler's working directory.
+// CHECK:     {
+// CHECK-NEXT:  "modules": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:     "clang-module-deps": [],
+// CHECK:          "context-hash": "[[HASH:.*]]",
+// CHECK:        }
+// CHECK:       "translation-units": [
+// CHECK:        {
+// CHECK:          "commands": [
+// CHECK:          {
+// CHECK-NEXT:        "clang-context-hash": "{{.*}}",
+// CHECK-NEXT:        "clang-module-deps": [
+// CHECK-NEXT:          {
+// CHECK-NEXT:            "context-hash": "[[HASH]]",
+// CHECK-NEXT:            "module-name": "mod"
+// CHECK:               }
+// CHECK:             ],
+// CHECK:     {
+// CHECK-NEXT:   "modules": [
+// CHECK-NEXT:    {
+// CHECK-NEXT:      "clang-module-deps": [],
+// CHECK:           "context-hash": "[[HASH]]",
+// CHECK:         }
+// CHECK:        "translation-units": [
+// CHECK:         {
+// CHECK:           "commands": [
+// CHECK:           {
+// CHECK-NEXT:         "clang-context-hash": "{{.*}}",
+// CHECK-NEXT:         "clang-module-deps": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:             "context-hash": "[[HASH]]",
+// CHECK-NEXT:             "module-name": "mod"
+// CHECK:               }
+// CHECK:              ],
+
+// Check that result0 and result2 compute 
diff erent hashes because
+// the working directory optmization is turned off for result2.
+// SKIPOPT:      {
+// SKIPOPT-NEXT:   "modules": [
+// SKIPOPT-NEXT:    {
+// SKIPOPT-NEXT:      "clang-module-deps": [],
+// SKIPOPT:           "context-hash": "[[HASH0:.*]]",
+// SKIPOPT:         }
+// SKIPOPT:        "translation-units": [
+// SKIPOPT:         {
+// SKIPOPT:            "commands": [
+// SKIPOPT:             {
+// SKIPOPT-NEXT:          "clang-context-hash": "{{.*}}",
+// SKIPOPT-NEXT:          "clang-module-deps": [
+// SKIPOPT-NEXT:            {
+// SKIPOPT-NEXT:              "context-hash": "[[HASH0]]",
+// SKIPOPT-NEXT:              "module-name": "mod"
+// SKIPOPT:            }
+// SKIPOPT:          ],
+// SKIPOPT:      {
+// SKIPOPT-NEXT:   "modules": [
+// SKIPOPT-NEXT:     {
+// SKIPOPT-NEXT:       "clang-module-deps": [],
+// SKIPOPT-NOT:        "context-hash": "[[HASH0]]",
+// SKIPOPT:            "context-hash": "[[HASH2:.*]]",
+// SKIPOPT:          }
+// SKIPOPT:       "translation-units": [
+// SKIPOPT:         {
+// SKIPOPT:           "commands": [
+// SKIPOPT:             {
+// SKIPOPT-NEXT:          "clang-context-hash": "{{.*}}",
+// SKIPOPT-NEXT:          "clang-module-deps": [
+// SKIPOPT-NEXT:            {
+// SKIPOPT-NOT:              "context-hash": "[[HASH0]]",
+// SKIPOPT-NEXT:             "context-hash": "[[HASH2]]"
+// SKIPOPT-NEXT:              "module-name": "mod"
+// SKIPOPT:            }
+// SKIPOPT:          ],
+
+// Check that result3 and result4 contain 
diff erent hashes because
+// both have a same relative path as a command line input, and
+// they are produced using 
diff erent compiler working directories.
+// RELPATH:      {
+// RELPATH-NEXT:   "modules": [
+// RELPATH-NEXT:    {
+// RELPATH-NEXT:      "clang-module-deps": [],
+// RELPATH:           "context-hash": "[[HASH3:.*]]",
+// RELPATH:         }
+// RELPATH:        "translation-units": [
+// RELPATH:         {
+// RELPATH:            "commands": [
+// RELPATH:             {
+// RELPATH-NEXT:          "clang-context-hash": "{{.*}}",
+// RELPATH-NEXT:          "clang-module-deps": [
+// RELPATH-NEXT:            {
+// RELPATH-NEXT:              "context-hash": "[[HASH3]]",
+// RELPATH-NEXT:              "module-name": "mod"
+// RELPATH:            }
+// RELPATH:          ],
+// RELPATH:      {
+// RELPATH-NEXT:   "modules": [
+// RELPATH-NEXT:     {
+// RELPATH-NEXT:       "clang-module-deps": [],
+// RELPATH-NOT:        "context-hash": "[[HASH3]]",
+// RELPATH:            "context-hash": "[[HASH4:.*]]",
+// RELPATH:          }
+// RELPATH:       "translation-units": [
+// RELPATH:         {
+// RELPATH:           "commands": [
+// RELPATH:             {
+// RELPATH-NEXT:          "clang-context-hash": "{{.*}}",
+// RELPATH-NEXT:          "clang-module-deps": [
+// RELPATH-NEXT:            {
+// RELPATH-NOT:              "context-hash": "[[HASH3]]",
+// RELPATH-NEXT:             "context-hash": "[[HASH4]]"
+// RELPATH-NEXT:              "module-name": "mod"
+// RELPATH:            }
+// RELPATH:          ],
+

diff  --git a/clang/test/ClangScanDeps/working-dir.m b/clang/test/ClangScanDeps/working-dir.m
index a04f8c2486b98d..c6b7b1988d3cf7 100644
--- a/clang/test/ClangScanDeps/working-dir.m
+++ b/clang/test/ClangScanDeps/working-dir.m
@@ -2,7 +2,7 @@
 // RUN: split-file %s %t
 // RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
 // RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
-// RUN:   -j 1 -format experimental-full --optimize-args=all > %t/deps.db
+// RUN:   -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db
 // RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
 
 // Check that there are two separate modules hashes. One for each working dir.

diff  --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 9e637d8872f79d..4b39ae9c35c045 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -167,6 +167,8 @@ static void ParseArgs(int argc, char **argv) {
             .Case("system-warnings", ScanningOptimizations::SystemWarnings)
             .Case("vfs", ScanningOptimizations::VFS)
             .Case("canonicalize-macros", ScanningOptimizations::Macros)
+            .Case("ignore-current-working-dir",
+                  ScanningOptimizations::IgnoreCWD)
             .Case("all", ScanningOptimizations::All)
             .Default(std::nullopt);
     if (!Optimization) {


        


More information about the cfe-commits mailing list