[clang] 51d1385 - [clang Dependency Scanning] Enhance File Caching Diagnostics (#144105)

via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 24 15:48:42 PDT 2025


Author: Qiongsi Wu
Date: 2025-06-24T15:48:39-07:00
New Revision: 51d1385cdf5e9508dd02a8283443afdd01641a1a

URL: https://github.com/llvm/llvm-project/commit/51d1385cdf5e9508dd02a8283443afdd01641a1a
DIFF: https://github.com/llvm/llvm-project/commit/51d1385cdf5e9508dd02a8283443afdd01641a1a.diff

LOG: [clang Dependency Scanning] Enhance File Caching Diagnostics (#144105)

`DependencyScanningFileSystemSharedCache` can currently report
out-of-date negatively stat cached paths. This PR enhances the reporting
with two modifications.

1. The reported path are now null terminated char arrays instead of
`StringRef`s. This way the API's user can avoid copying `StringRef`s to
other containers because the char arrays can be used directly.
2. The API now reports out-of-date cache entry due to file size changes.
Specifically, we check each file's cached size against the size of the
same file on the underlying FS. If the sizes are different, diagnostics
will be reported.

rdar://152247357

Added: 
    

Modified: 
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
    clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index a20a89a4c2b76..2b21be7712693 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -18,6 +18,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include <mutex>
 #include <optional>
+#include <variant>
 
 namespace clang {
 namespace tooling {
@@ -220,13 +221,34 @@ class DependencyScanningFilesystemSharedCache {
   CacheShard &getShardForFilename(StringRef Filename) const;
   CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
 
-  /// Visits all cached entries and re-stat an entry using FS if
-  /// it is negatively stat cached. If re-stat succeeds on a path,
-  /// the path is added to InvalidPaths, indicating that the cache
-  /// may have erroneously negatively cached it. The caller can then
-  /// use InvalidPaths to issue diagnostics.
-  std::vector<StringRef>
-  getInvalidNegativeStatCachedPaths(llvm::vfs::FileSystem &UnderlyingFS) const;
+  struct OutOfDateEntry {
+    // A null terminated string that contains a path.
+    const char *Path = nullptr;
+
+    struct NegativelyCachedInfo {};
+    struct SizeChangedInfo {
+      uint64_t CachedSize = 0;
+      uint64_t ActualSize = 0;
+    };
+
+    std::variant<NegativelyCachedInfo, SizeChangedInfo> Info;
+
+    OutOfDateEntry(const char *Path)
+        : Path(Path), Info(NegativelyCachedInfo{}) {}
+
+    OutOfDateEntry(const char *Path, uint64_t CachedSize, uint64_t ActualSize)
+        : Path(Path), Info(SizeChangedInfo{CachedSize, ActualSize}) {}
+  };
+
+  /// Visits all cached entries and re-stat an entry using UnderlyingFS to check
+  /// if the cache contains out-of-date entries. An entry can be out-of-date for
+  /// two reasons:
+  ///  1. The entry contains a stat error, indicating the file did not exist
+  ///     in the cache, but the file exists on the UnderlyingFS.
+  ///  2. The entry is associated with a file whose size is 
diff erent from the
+  ///     size of the file on the same path on the UnderlyingFS.
+  std::vector<OutOfDateEntry>
+  getOutOfDateEntries(llvm::vfs::FileSystem &UnderlyingFS) const;
 
 private:
   std::unique_ptr<CacheShard[]> CacheShards;

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 140833050f4e9..2437b2d3595e5 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -107,31 +107,39 @@ DependencyScanningFilesystemSharedCache::getShardForUID(
   return CacheShards[Hash % NumShards];
 }
 
-std::vector<StringRef>
-DependencyScanningFilesystemSharedCache::getInvalidNegativeStatCachedPaths(
+std::vector<DependencyScanningFilesystemSharedCache::OutOfDateEntry>
+DependencyScanningFilesystemSharedCache::getOutOfDateEntries(
     llvm::vfs::FileSystem &UnderlyingFS) const {
   // Iterate through all shards and look for cached stat errors.
-  std::vector<StringRef> InvalidPaths;
+  std::vector<OutOfDateEntry> InvalidDiagInfo;
   for (unsigned i = 0; i < NumShards; i++) {
     const CacheShard &Shard = CacheShards[i];
     std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
     for (const auto &[Path, CachedPair] : Shard.CacheByFilename) {
       const CachedFileSystemEntry *Entry = CachedPair.first;
 
-      if (Entry->getError()) {
-        // Only examine cached errors.
-        llvm::ErrorOr<llvm::vfs::Status> Stat = UnderlyingFS.status(Path);
-        if (Stat) {
+      llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path);
+      if (Status) {
+        if (Entry->getError()) {
           // This is the case where we have cached the non-existence
-          // of the file at Path first, and a a file at the path is created
+          // of the file at Path first, and a file at the path is created
           // later. The cache entry is not invalidated (as we have no good
           // way to do it now), which may lead to missing file build errors.
-          InvalidPaths.push_back(Path);
+          InvalidDiagInfo.emplace_back(Path.data());
+        } else {
+          llvm::vfs::Status CachedStatus = Entry->getStatus();
+          uint64_t CachedSize = CachedStatus.getSize();
+          uint64_t ActualSize = Status->getSize();
+          if (CachedSize != ActualSize) {
+            // This is the case where the cached file has a 
diff erent size
+            // from the actual file that comes from the underlying FS.
+            InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize);
+          }
         }
       }
     }
   }
-  return InvalidPaths;
+  return InvalidDiagInfo;
 }
 
 const CachedFileSystemEntry *

diff  --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 7420743c97a2a..b461d9109271c 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -187,18 +187,49 @@ TEST(DependencyScanningFilesystem, DiagnoseStaleStatFailures) {
   DependencyScanningWorkerFilesystem DepFS(SharedCache, InMemoryFS);
 
   bool Path1Exists = DepFS.exists("/path1.suffix");
-  EXPECT_EQ(Path1Exists, false);
+  ASSERT_EQ(Path1Exists, false);
 
   // Adding a file that has been stat-ed,
   InMemoryFS->addFile("/path1.suffix", 0, llvm::MemoryBuffer::getMemBuffer(""));
   Path1Exists = DepFS.exists("/path1.suffix");
   // Due to caching in SharedCache, path1 should not exist in
   // DepFS's eyes.
-  EXPECT_EQ(Path1Exists, false);
+  ASSERT_EQ(Path1Exists, false);
 
-  std::vector<llvm::StringRef> InvalidPaths =
-      SharedCache.getInvalidNegativeStatCachedPaths(*InMemoryFS);
+  auto InvalidEntries = SharedCache.getOutOfDateEntries(*InMemoryFS);
 
-  EXPECT_EQ(InvalidPaths.size(), 1u);
-  ASSERT_STREQ("/path1.suffix", InvalidPaths[0].str().c_str());
+  EXPECT_EQ(InvalidEntries.size(), 1u);
+  ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
+}
+
+TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) {
+  auto InMemoryFS1 = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  auto InMemoryFS2 = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS1->setCurrentWorkingDirectory("/");
+  InMemoryFS2->setCurrentWorkingDirectory("/");
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, InMemoryFS1);
+
+  InMemoryFS1->addFile("/path1.suffix", 0,
+                       llvm::MemoryBuffer::getMemBuffer(""));
+  bool Path1Exists = DepFS.exists("/path1.suffix");
+  ASSERT_EQ(Path1Exists, true);
+
+  // Add a file to a new FS that has the same path but 
diff erent content.
+  InMemoryFS2->addFile("/path1.suffix", 1,
+                       llvm::MemoryBuffer::getMemBuffer("        "));
+
+  // Check against the new file system. InMemoryFS2 could be the underlying
+  // physical system in the real world.
+  auto InvalidEntries = SharedCache.getOutOfDateEntries(*InMemoryFS2);
+
+  ASSERT_EQ(InvalidEntries.size(), 1u);
+  ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
+  auto SizeInfo = std::get_if<
+      DependencyScanningFilesystemSharedCache::OutOfDateEntry::SizeChangedInfo>(
+      &InvalidEntries[0].Info);
+  ASSERT_TRUE(SizeInfo);
+  ASSERT_EQ(SizeInfo->CachedSize, 0u);
+  ASSERT_EQ(SizeInfo->ActualSize, 8u);
 }


        


More information about the cfe-commits mailing list