[clang] [clang][deps] Only bypass scanning VFS for the module cache (PR #88800)

via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 15 14:32:43 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Jan Svoboda (jansvoboda11)

<details>
<summary>Changes</summary>

The scanning VFS doesn't cache stat failures of paths with no extension. This was originally implemented to avoid caching the non-existence of the modules cache directory that the modular scanner will eventually create if it does not exist.

However, this prevents caching of the non-existence of all directories and notably also header files from the standard C++ library, which can lead to sub-par performance.

This patch adds an API to the scanning VFS that allows clients to configure path prefix for which to bypass the scanning VFS and use the underlying VFS directly.

---
Full diff: https://github.com/llvm/llvm-project/pull/88800.diff


4 Files Affected:

- (modified) clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h (+13) 
- (modified) clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp (+7-9) 
- (modified) clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp (+20-12) 
- (modified) clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp (+27) 


``````````diff
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index f7b4510d7f7beb..d12814e7c9253e 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -353,6 +353,12 @@ class DependencyScanningWorkerFilesystem
 
   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 
+  /// Make it so that no paths bypass this VFS.
+  void resetBypassedPathPrefix() { BypassedPathPrefix.reset(); }
+  /// Set the prefix for paths that should bypass this VFS and go straight to
+  /// the underlying VFS.
+  void setBypassedPathPrefix(StringRef Prefix) { BypassedPathPrefix = Prefix; }
+
   /// Returns entry for the given filename.
   ///
   /// Attempts to use the local and shared caches first, then falls back to
@@ -450,12 +456,19 @@ class DependencyScanningWorkerFilesystem
     getUnderlyingFS().print(OS, Type, IndentLevel + 1);
   }
 
+  /// Whether this path should bypass this VFS and go straight to the underlying
+  /// VFS.
+  bool shouldBypass(StringRef Path) const;
+
   /// The global cache shared between worker threads.
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
   DependencyScanningFilesystemLocalCache LocalCache;
 
+  /// Prefix of paths that should go straight to the underlying VFS.
+  std::optional<std::string> BypassedPathPrefix;
+
   /// The working directory to use for making relative paths absolute before
   /// using them for cache lookups.
   llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 0cab17a3424406..4d738e4bea41a6 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -201,11 +201,8 @@ const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
   return *StoredRealPath;
 }
 
-static bool shouldCacheStatFailures(StringRef Filename) {
-  StringRef Ext = llvm::sys::path::extension(Filename);
-  if (Ext.empty())
-    return false; // This may be the module cache directory.
-  return true;
+bool DependencyScanningWorkerFilesystem::shouldBypass(StringRef Path) const {
+  return BypassedPathPrefix && Path.starts_with(*BypassedPathPrefix);
 }
 
 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
@@ -244,8 +241,6 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(
   llvm::ErrorOr<llvm::vfs::Status> Stat =
       getUnderlyingFS().status(OriginalFilename);
   if (!Stat) {
-    if (!shouldCacheStatFailures(OriginalFilename))
-      return Stat.getError();
     const auto &Entry =
         getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError());
     return insertLocalEntryForFilename(FilenameForLookup, Entry);
@@ -291,7 +286,7 @@ DependencyScanningWorkerFilesystem::status(const Twine &Path) {
   SmallString<256> OwnedFilename;
   StringRef Filename = Path.toStringRef(OwnedFilename);
 
-  if (Filename.ends_with(".pcm"))
+  if (shouldBypass(Filename))
     return getUnderlyingFS().status(Path);
 
   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
@@ -362,7 +357,7 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
   SmallString<256> OwnedFilename;
   StringRef Filename = Path.toStringRef(OwnedFilename);
 
-  if (Filename.ends_with(".pcm"))
+  if (shouldBypass(Filename))
     return getUnderlyingFS().openFileForRead(Path);
 
   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
@@ -377,6 +372,9 @@ DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
   SmallString<256> OwnedFilename;
   StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
 
+  if (shouldBypass(OriginalFilename))
+    return getUnderlyingFS().getRealPath(Path, Output);
+
   SmallString<256> PathBuf;
   auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
   if (!FilenameForLookup)
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 32850f5eea92a9..4fad825ca242ba 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -347,6 +347,26 @@ class DependencyScanningAction : public tooling::ToolAction {
         ScanInstance.getInvocation(), ScanInstance.getDiagnostics(),
         DriverFileMgr->getVirtualFileSystemPtr());
 
+    // Use the dependency scanning optimized file system if requested to do so.
+    if (DepFS) {
+      StringRef ModulesCachePath =
+          ScanInstance.getHeaderSearchOpts().ModuleCachePath;
+
+      DepFS->resetBypassedPathPrefix();
+      if (!ModulesCachePath.empty())
+        DepFS->setBypassedPathPrefix(ModulesCachePath);
+
+      ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
+          [LocalDepFS = DepFS](FileEntryRef File)
+          -> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
+        if (llvm::ErrorOr<EntryRef> Entry =
+                LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
+          if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry))
+            return Entry->getDirectiveTokens();
+        return std::nullopt;
+      };
+    }
+
     // Create a new FileManager to match the invocation's FileSystemOptions.
     auto *FileMgr = ScanInstance.createFileManager(FS);
     ScanInstance.createSourceManager(*FileMgr);
@@ -363,18 +383,6 @@ class DependencyScanningAction : public tooling::ToolAction {
               PrebuiltModuleVFSMap, ScanInstance.getDiagnostics()))
         return false;
 
-    // Use the dependency scanning optimized file system if requested to do so.
-    if (DepFS)
-      ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
-          [LocalDepFS = DepFS](FileEntryRef File)
-          -> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
-        if (llvm::ErrorOr<EntryRef> Entry =
-                LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
-          if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry))
-            return Entry->getDirectiveTokens();
-        return std::nullopt;
-      };
-
     // Create the dependency collector that will collect the produced
     // dependencies.
     //
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 87bb67cfd9327c..69843d3ca50b0e 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -174,3 +174,30 @@ TEST(DependencyScanningFilesystem, CacheStatOnExists) {
   EXPECT_EQ(InstrumentingFS->NumStatusCalls, 2u);
   EXPECT_EQ(InstrumentingFS->NumExistsCalls, 0u);
 }
+
+TEST(DependencyScanningFilesystem, CacheStatFailures) {
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory("/");
+  InMemoryFS->addFile("/dir/vector", 0, llvm::MemoryBuffer::getMemBuffer(""));
+  InMemoryFS->addFile("/cache/a.pcm", 0, llvm::MemoryBuffer::getMemBuffer(""));
+
+  auto InstrumentingFS =
+      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, InstrumentingFS);
+
+  DepFS.status("/dir");
+  DepFS.status("/dir");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 1u);
+
+  DepFS.status("/dir/vector");
+  DepFS.status("/dir/vector");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 2u);
+
+  DepFS.setBypassedPathPrefix("/cache");
+  DepFS.exists("/cache/a.pcm");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 3u);
+  DepFS.exists("/cache/a.pcm");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 4u);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/88800


More information about the cfe-commits mailing list