[clang] bc1a297 - [clang][deps] Separate filesystem caches for minimized and original files

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 20 03:08:53 PDT 2021


Author: Jan Svoboda
Date: 2021-07-20T12:08:46+02:00
New Revision: bc1a2979fc70d954ae97122205c71c8404a1b17e

URL: https://github.com/llvm/llvm-project/commit/bc1a2979fc70d954ae97122205c71c8404a1b17e
DIFF: https://github.com/llvm/llvm-project/commit/bc1a2979fc70d954ae97122205c71c8404a1b17e.diff

LOG: [clang][deps] Separate filesystem caches for minimized and original files

This patch separates the local and global caches of `DependencyScanningFilesystem` into two buckets: minimized files and original files. This is necessary to deal with precompiled modules/headers.

Consider a single worker with its instance of filesystem:
1. Build system uses the worker to scan dependencies of module A => filesystem cache gets populated with minimized input files.
2. Build system uses the results to explicitly build module A => explicitly built module captures the state of the real filesystem (containing non-minimized input files).
3. Build system uses the prebuilt module A as an explicit precompiled dependency for another compile job B.
4. Build system uses the same worker to scan dependencies for job B => worker uses implicit modular build to discover dependencies, which validates the filesystem state embedded in the prebuilt module (non-minimized files) to the current view of the filesystem (minimized files), resulting in validation failures.

This problem can be avoided in step 4 by collecting input files from the precompiled module and marking them as "ignored" in the minimizing filesystem. This way, the validation should succeed, since we should be always dealing with the original (non-minized) input files. However, the filesystem already minimized the input files in step 1 and put it in the cache, which gets used in step 4 as well even though it's marked ignored (do not minimize). This patch essentially fixes this oversight by making the `"file is minimized"` part of the cache key (from high level).

Depends on D106064.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D106146

Added: 
    

Modified: 
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
    clang/unittests/Tooling/CMakeLists.txt
    clang/unittests/Tooling/DependencyScannerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index b213f388dadc0..c52da3305f7c1 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -103,7 +103,8 @@ class CachedFileSystemEntry {
 };
 
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
-/// underlying real file system.
+/// underlying real file system. It distinguishes between minimized and original
+/// files.
 ///
 /// It is sharded based on the hash of the key to reduce the lock contention for
 /// the worker threads.
@@ -114,21 +115,62 @@ class DependencyScanningFilesystemSharedCache {
     CachedFileSystemEntry Value;
   };
 
-  DependencyScanningFilesystemSharedCache();
-
   /// Returns a cache entry for the corresponding key.
   ///
   /// A new cache entry is created if the key is not in the cache. This is a
   /// thread safe call.
-  SharedFileSystemEntry &get(StringRef Key);
+  SharedFileSystemEntry &get(StringRef Key, bool Minimized);
 
 private:
-  struct CacheShard {
-    std::mutex CacheLock;
-    llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
+  class SingleCache {
+  public:
+    SingleCache();
+
+    SharedFileSystemEntry &get(StringRef Key);
+
+  private:
+    struct CacheShard {
+      std::mutex CacheLock;
+      llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
+    };
+    std::unique_ptr<CacheShard[]> CacheShards;
+    unsigned NumShards;
   };
-  std::unique_ptr<CacheShard[]> CacheShards;
-  unsigned NumShards;
+
+  SingleCache CacheMinimized;
+  SingleCache CacheOriginal;
+};
+
+/// This class is a local cache, that caches the 'stat' and 'open' calls to the
+/// underlying real file system. It distinguishes between minimized and original
+/// files.
+class DependencyScanningFilesystemLocalCache {
+private:
+  using SingleCache =
+      llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>;
+
+  SingleCache CacheMinimized;
+  SingleCache CacheOriginal;
+
+  SingleCache &selectCache(bool Minimized) {
+    return Minimized ? CacheMinimized : CacheOriginal;
+  }
+
+public:
+  void setCachedEntry(StringRef Filename, bool Minimized,
+                      const CachedFileSystemEntry *Entry) {
+    SingleCache &Cache = selectCache(Minimized);
+    bool IsInserted = Cache.try_emplace(Filename, Entry).second;
+    (void)IsInserted;
+    assert(IsInserted && "local cache is updated more than once");
+  }
+
+  const CachedFileSystemEntry *getCachedEntry(StringRef Filename,
+                                              bool Minimized) {
+    SingleCache &Cache = selectCache(Minimized);
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue();
+  }
 };
 
 /// A virtual file system optimized for the dependency discovery.
@@ -159,24 +201,14 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
 private:
   bool shouldIgnoreFile(StringRef Filename);
 
-  void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
-    bool IsInserted = Cache.try_emplace(Filename, Entry).second;
-    (void)IsInserted;
-    assert(IsInserted && "local cache is updated more than once");
-  }
-
-  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
-    auto It = Cache.find(Filename);
-    return It == Cache.end() ? nullptr : It->getValue();
-  }
-
   llvm::ErrorOr<const CachedFileSystemEntry *>
   getOrCreateFileSystemEntry(const StringRef Filename);
 
+  /// The global cache shared between worker threads.
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
-  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
+  DependencyScanningFilesystemLocalCache Cache;
   /// The optional mapping structure which records information about the
   /// excluded conditional directive skip mappings that are used by the
   /// currently active preprocessor.

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 88b835e159f3c..166d4a25363cf 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -99,8 +99,7 @@ CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status &&Stat) {
   return Result;
 }
 
-DependencyScanningFilesystemSharedCache::
-    DependencyScanningFilesystemSharedCache() {
+DependencyScanningFilesystemSharedCache::SingleCache::SingleCache() {
   // This heuristic was chosen using a empirical testing on a
   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
   // sharding gives a performance edge by reducing the lock contention.
@@ -111,18 +110,20 @@ DependencyScanningFilesystemSharedCache::
   CacheShards = std::make_unique<CacheShard[]>(NumShards);
 }
 
-/// Returns a cache entry for the corresponding key.
-///
-/// A new cache entry is created if the key is not in the cache. This is a
-/// thread safe call.
 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::get(StringRef Key) {
+DependencyScanningFilesystemSharedCache::SingleCache::get(StringRef Key) {
   CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
   std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
   auto It = Shard.Cache.try_emplace(Key);
   return It.first->getValue();
 }
 
+DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) {
+  SingleCache &Cache = Minimized ? CacheMinimized : CacheOriginal;
+  return Cache.get(Key);
+}
+
 /// Whitelist file extensions that should be minimized, treating no extension as
 /// a source file that should be minimized.
 ///
@@ -165,17 +166,17 @@ bool DependencyScanningWorkerFilesystem::shouldIgnoreFile(
 llvm::ErrorOr<const CachedFileSystemEntry *>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
     const StringRef Filename) {
-  if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) {
+  bool ShouldMinimize =
+      !IgnoredFiles.count(Filename) && shouldMinimize(Filename);
+
+  if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize))
     return Entry;
-  }
 
   // FIXME: Handle PCM/PCH files.
   // FIXME: Handle module map files.
 
-  bool KeepOriginalSource = shouldIgnoreFile(Filename) ||
-                            !shouldMinimize(Filename);
   DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
-      &SharedCacheEntry = SharedCache.get(Filename);
+      &SharedCacheEntry = SharedCache.get(Filename, ShouldMinimize);
   const CachedFileSystemEntry *Result;
   {
     std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
@@ -197,15 +198,15 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
         CacheEntry = CachedFileSystemEntry::createDirectoryEntry(
             std::move(*MaybeStatus));
       else
-        CacheEntry = CachedFileSystemEntry::createFileEntry(
-            Filename, FS, !KeepOriginalSource);
+        CacheEntry = CachedFileSystemEntry::createFileEntry(Filename, FS,
+                                                            ShouldMinimize);
     }
 
     Result = &CacheEntry;
   }
 
   // Store the result in the local cache.
-  setCachedEntry(Filename, Result);
+  Cache.setCachedEntry(Filename, ShouldMinimize, Result);
   return Result;
 }
 

diff  --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt
index 3ed6992d81f50..01eb8d67dadac 100644
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -68,6 +68,7 @@ clang_target_link_libraries(ToolingTests
   clangAST
   clangASTMatchers
   clangBasic
+  clangDependencyScanning
   clangFormat
   clangFrontend
   clangLex

diff  --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp
index 5a9c514c07489..538b36b4b2edc 100644
--- a/clang/unittests/Tooling/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -15,6 +15,7 @@
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "clang/Tooling/Tooling.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
@@ -203,5 +204,35 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerHasInclude) {
   EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h");
 }
 
+namespace dependencies {
+TEST(DependencyScanningFilesystem, IgnoredFilesHaveSeparateCache) {
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  VFS->addFile("/mod.h", 0, llvm::MemoryBuffer::getMemBuffer("// hi there!\n"));
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  auto Mappings = std::make_unique<ExcludedPreprocessorDirectiveSkipMapping>();
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings.get());
+
+  auto StatusMinimized0 = DepFS.status("/mod.h");
+  DepFS.ignoreFile("/mod.h");
+  auto StatusFull1 = DepFS.status("/mod.h");
+  DepFS.clearIgnoredFiles();
+
+  auto StatusMinimized2 = DepFS.status("/mod.h");
+  DepFS.ignoreFile("/mod.h");
+  auto StatusFull3 = DepFS.status("/mod.h");
+
+  EXPECT_TRUE(StatusMinimized0);
+  EXPECT_EQ(StatusMinimized0->getSize(), 0u);
+  EXPECT_TRUE(StatusFull1);
+  EXPECT_EQ(StatusFull1->getSize(), 13u);
+
+  EXPECT_TRUE(StatusMinimized2);
+  EXPECT_EQ(StatusMinimized2->getSize(), 0u);
+  EXPECT_TRUE(StatusFull3);
+  EXPECT_EQ(StatusFull3->getSize(), 13u);
+}
+
+} // end namespace dependencies
 } // end namespace tooling
 } // end namespace clang


        


More information about the cfe-commits mailing list