[clang] 8cc2a13 - [clang][deps] Handle symlinks in minimizing FS

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 21 04:04:33 PST 2022


Author: Jan Svoboda
Date: 2022-01-21T13:04:25+01:00
New Revision: 8cc2a137270462bc191377dbab97c739583814dd

URL: https://github.com/llvm/llvm-project/commit/8cc2a137270462bc191377dbab97c739583814dd
DIFF: https://github.com/llvm/llvm-project/commit/8cc2a137270462bc191377dbab97c739583814dd.diff

LOG: [clang][deps] Handle symlinks in minimizing FS

The minimizing and caching filesystem used by the dependency scanner can be configured to **not** minimize some files. That's necessary when scanning a TU with prebuilt inputs (i.e. PCH) that refer to the original (non-minimized) files. Minimizing such files in the dependency scanner would cause discrepancy between the current perceived state of the filesystem and the file sizes stored in the AST file. By not minimizing such files, we avoid creating the discrepancy.

The problem with the current approach is that files that should not be minimized are identified by their path. This breaks down when the prebuilt input (PCH) and the current TU refer to the same file via different paths (i.e. symlinks). This patch switches from paths to `llvm::sys::fs::UniqueID` when identifying ignored files. This is consistent with how the rest of Clang treats files.

Depends on D114966.

Reviewed By: dexonsmith, arphaman

Differential Revision: https://reviews.llvm.org/D114971

Added: 
    clang/test/ClangScanDeps/modules-symlink.c

Modified: 
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 70e9c4a3ffea2..7c830d3f27333 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -11,8 +11,8 @@
 
 #include "clang/Basic/LLVM.h"
 #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/VirtualFileSystem.h"
@@ -308,13 +308,15 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
 
 private:
   /// Check whether the file should be minimized.
-  bool shouldMinimize(StringRef Filename);
+  bool shouldMinimize(StringRef Filename, llvm::sys::fs::UniqueID UID);
 
   /// Returns entry for the given filename.
   ///
   /// Attempts to use the local and shared caches first, then falls back to
   /// using the underlying filesystem.
-  llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
+  llvm::ErrorOr<EntryRef>
+  getOrCreateFileSystemEntry(StringRef Filename,
+                             bool DisableMinimization = false);
 
   /// For a filename that's not yet associated with any entry in the caches,
   /// uses the underlying filesystem to either look up the entry based in the
@@ -325,7 +327,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// Minimizes the given entry if necessary and returns a wrapper object with
   /// reference semantics.
   EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry,
-                               StringRef Filename);
+                               StringRef Filename, bool Disable);
 
   /// Represents a filesystem entry that has been stat-ed (and potentially read)
   /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
@@ -401,7 +403,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// currently active preprocessor.
   ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings;
   /// The set of files that should not be minimized.
-  llvm::StringSet<> NotToBeMinimized;
+  llvm::DenseSet<llvm::sys::fs::UniqueID> NotToBeMinimized;
 };
 
 } // end namespace dependencies

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index cc8968a7b680f..80a70252721d8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -42,8 +42,9 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
 }
 
 EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
-    const CachedFileSystemEntry &Entry, StringRef Filename) {
-  if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename))
+    const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
+  if (Entry.isError() || Entry.isDirectory() || Disable ||
+      !shouldMinimize(Filename, Entry.getUniqueID()))
     return EntryRef(/*Minimized=*/false, Filename, Entry);
 
   CachedFileContents *Contents = Entry.getContents();
@@ -210,19 +211,18 @@ static bool shouldCacheStatFailures(StringRef Filename) {
 }
 
 void DependencyScanningWorkerFilesystem::disableMinimization(
-    StringRef RawFilename) {
-  llvm::SmallString<256> Filename;
-  llvm::sys::path::native(RawFilename, Filename);
-  NotToBeMinimized.insert(Filename);
+    StringRef Filename) {
+  // Since we're not done setting up `NotToBeMinimized` yet, we need to disable
+  // minimization explicitly.
+  if (llvm::ErrorOr<EntryRef> Result =
+          getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true))
+    NotToBeMinimized.insert(Result->getStatus().getUniqueID());
 }
 
-bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
-  if (!shouldMinimizeBasedOnExtension(RawFilename))
-    return false;
-
-  llvm::SmallString<256> Filename;
-  llvm::sys::path::native(RawFilename, Filename);
-  return !NotToBeMinimized.contains(Filename);
+bool DependencyScanningWorkerFilesystem::shouldMinimize(
+    StringRef Filename, llvm::sys::fs::UniqueID UID) {
+  return shouldMinimizeBasedOnExtension(Filename) &&
+         !NotToBeMinimized.contains(UID);
 }
 
 const CachedFileSystemEntry &
@@ -275,13 +275,15 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
 
 llvm::ErrorOr<EntryRef>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
-    StringRef Filename) {
+    StringRef Filename, bool DisableMinimization) {
   if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
-    return minimizeIfNecessary(*Entry, Filename).unwrapError();
+    return minimizeIfNecessary(*Entry, Filename, DisableMinimization)
+        .unwrapError();
   auto MaybeEntry = computeAndStoreResult(Filename);
   if (!MaybeEntry)
     return MaybeEntry.getError();
-  return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError();
+  return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization)
+      .unwrapError();
 }
 
 llvm::ErrorOr<llvm::vfs::Status>

diff  --git a/clang/test/ClangScanDeps/modules-symlink.c b/clang/test/ClangScanDeps/modules-symlink.c
new file mode 100644
index 0000000000000..1a2fe2d9f5123
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-symlink.c
@@ -0,0 +1,54 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- cdb_pch.json
+[
+  {
+    "directory": "DIR",
+    "command": "clang -x c-header DIR/pch.h -fmodules -gmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -o DIR/pch.h.gch",
+    "file": "DIR/pch.h"
+  }
+]
+
+//--- cdb_tu.json
+[
+  {
+    "directory": "DIR",
+    "command": "clang -c DIR/tu.c -fmodules -gmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -include DIR/pch.h -o DIR/tu.o",
+    "file": "DIR/tu.c"
+  }
+]
+
+//--- module.modulemap
+module mod { header "symlink.h" }
+
+//--- pch.h
+#include "symlink.h"
+
+//--- original.h
+// Comment that will be stripped by the minimizer.
+#define MACRO 1
+
+//--- tu.c
+#include "original.h"
+static int foo = MACRO; // Macro usage that will trigger
+                        // input file consistency checks.
+
+// RUN: ln -s %t/original.h %t/symlink.h
+
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb_pch.json > %t/cdb.json
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full \
+// RUN:   -generate-modules-path-args -module-files-dir %t/build > %t/result_pch.json
+//
+// RUN: %python %S/../../utils/module-deps-to-rsp.py %t/result_pch.json \
+// RUN:   --module-name=mod > %t/mod.cc1.rsp
+// RUN: %python %S/../../utils/module-deps-to-rsp.py %t/result_pch.json \
+// RUN:   --tu-index=0 > %t/pch.rsp
+//
+// RUN: %clang @%t/mod.cc1.rsp
+// RUN: %clang -x c-header %t/pch.h -fmodules -gmodules -fimplicit-module-maps \
+// RUN:   -fmodules-cache-path=%t/cache -o %t/pch.h.gch -I %t @%t/pch.rsp
+
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb_tu.json > %t/cdb.json
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full \
+// RUN:   -generate-modules-path-args -module-files-dir %t/build > %t/result_tu.json


        


More information about the cfe-commits mailing list