[clang] [clang][DependencyScanner] Include the working directory in the context hash (PR #73719)

Michael Spencer via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 28 15:51:19 PST 2023


https://github.com/Bigcheese created https://github.com/llvm/llvm-project/pull/73719

The working directory is included in the PCM, but is not currently part of the context hash. This causes problems because different builds of a PCM with exactly the same command line can end up with different binary content for a PCM. If a build system tracks tasks by both working directory and command line, it may build a given PCM multiple times, causing a "module file out of date" error when loading the PCM due to different sizes.

>From 4122b4a5d394bc7b6923e33b1e7a10606b52c83c Mon Sep 17 00:00:00 2001
From: Michael Spencer <michael_spencer at apple.com>
Date: Mon, 20 Mar 2023 18:48:10 -0700
Subject: [PATCH] [clang][DependencyScanner] Include the working directory in
 the context hash

The working directory is included in the PCM, but is not currently
part of the context hash. This causes problems because different
builds of a PCM with exactly the same command line can end up with
different binary content for a PCM. If a build system tracks tasks by
both working directory and command line, it may build a given PCM
multiple times, causing a "module file out of date" error when loading
the PCM due to different sizes.
---
 .../DependencyScanning/ModuleDepCollector.cpp |  13 ++-
 clang/test/ClangScanDeps/working-dir.m        | 107 ++++++++++++++++++
 2 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/ClangScanDeps/working-dir.m

diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 9099c18391e4d29..3684ac46fbd4717 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -316,7 +316,8 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
 
 static std::string getModuleContextHash(const ModuleDeps &MD,
                                         const CowCompilerInvocation &CI,
-                                        bool EagerLoadModules) {
+                                        bool EagerLoadModules,
+                                        llvm::vfs::FileSystem &VFS) {
   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
       HashBuilder;
   SmallString<32> Scratch;
@@ -325,6 +326,13 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
   // will be readable.
   HashBuilder.add(getClangFullRepositoryVersion());
   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
+  if (CI.getFileSystemOpts().WorkingDir.empty()) {
+    llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
+    if (CWD)
+      HashBuilder.add(*CWD);
+  }
+  // If any of the above options are set, then there must have been a command
+  // line argument to set them, which will already be hashed.
 
   // Hash the BuildInvocation without any input files.
   SmallString<0> ArgVec;
@@ -356,7 +364,8 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
 
 void ModuleDepCollector::associateWithContextHash(
     const CowCompilerInvocation &CI, ModuleDeps &Deps) {
-  Deps.ID.ContextHash = getModuleContextHash(Deps, CI, EagerLoadModules);
+  Deps.ID.ContextHash = getModuleContextHash(
+      Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
   (void)Inserted;
   assert(Inserted && "duplicate module mapping");
diff --git a/clang/test/ClangScanDeps/working-dir.m b/clang/test/ClangScanDeps/working-dir.m
new file mode 100644
index 000000000000000..a43df2351f67f88
--- /dev/null
+++ b/clang/test/ClangScanDeps/working-dir.m
@@ -0,0 +1,107 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
+// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
+// RUN:   -j 1 -format experimental-full --optimize-args=all > %t/deps.db
+// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
+
+// Check that there are two separate modules hashes. One for each working dir.
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps":
+// CHECK-NEXT:       "clang-modulemap-file":
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK:            ],
+// CHECK-NEXT:       "name": "A"
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps":
+// CHECK-NEXT:       "clang-modulemap-file":
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK:            ],
+// CHECK-NEXT:       "name": "A"
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps":
+// CHECK:            ],
+// CHECK-NEXT:       "clang-modulemap-file":
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK:            ],
+// CHECK-NEXT:       "name": "B"
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps":
+// CHECK:            ],
+// CHECK-NEXT:       "clang-modulemap-file":
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK:            ],
+// CHECK-NEXT:       "name": "B"
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ],
+// CHECK-NEXT:   "translation-units": [
+// CHECK:        ]
+// CHECK:      }
+
+//--- build/compile-commands.json.in
+
+[
+{
+  "directory": "DIR/build",
+  "command": "clang -c DIR/A.m -IDIR/modules/A -IDIR/modules/B -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+  "file": "DIR/A.m"
+},
+{
+  "directory": "DIR",
+  "command": "clang -c DIR/B.m -IDIR/modules/A -IDIR/modules/B -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+  "file": "DIR/B.m"
+}
+]
+
+
+//--- modules/A/module.modulemap
+
+module A {
+  umbrella header "A.h"
+}
+
+//--- modules/A/A.h
+
+typedef int A_t;
+
+//--- modules/B/module.modulemap
+
+module B {
+  umbrella header "B.h"
+}
+
+//--- modules/B/B.h
+
+#include <A.h>
+
+typedef int B_t;
+
+//--- A.m
+
+#include <B.h>
+
+A_t a = 0;
+
+//--- B.m
+
+#include <B.h>
+
+B_t b = 0;



More information about the cfe-commits mailing list