[clang] [clang Dependency Scanning] Enhance File Caching Diagnostics (PR #144105)

Qiongsi Wu via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 23 10:33:41 PDT 2025


https://github.com/qiongsiwu updated https://github.com/llvm/llvm-project/pull/144105

>From 2187de3d30045a02c03ca009f2497608ab6bc6a4 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Thu, 12 Jun 2025 16:05:32 -0700
Subject: [PATCH 1/3] Enhancing DependencyScanningFilesystemSharedCache's API
 that reports invalid entries.

---
 .../DependencyScanningFilesystem.h            | 42 +++++++++++++++----
 .../DependencyScanningFilesystem.cpp          | 28 ++++++++-----
 .../DependencyScanningFilesystemTest.cpp      | 41 +++++++++++++++---
 3 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index a20a89a4c2b76..e0656676fefff 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -220,13 +220,41 @@ class DependencyScanningFilesystemSharedCache {
   CacheShard &getShardForFilename(StringRef Filename) const;
   CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
 
-  /// Visits all cached entries and re-stat an entry using FS if
-  /// it is negatively stat cached. If re-stat succeeds on a path,
-  /// the path is added to InvalidPaths, indicating that the cache
-  /// may have erroneously negatively cached it. The caller can then
-  /// use InvalidPaths to issue diagnostics.
-  std::vector<StringRef>
-  getInvalidNegativeStatCachedPaths(llvm::vfs::FileSystem &UnderlyingFS) const;
+  struct InvalidEntryDiagInfo {
+    // A null terminated string that contains a path.
+    const char *Path = nullptr;
+
+    enum class Type : unsigned char { NegativeCaching = 1, SizeChanged = 2 };
+
+    Type T;
+
+    struct SizeChangedInfo {
+      uint64_t CachedSize = 0;
+      uint64_t ActualSize = 0;
+    };
+
+    std::optional<SizeChangedInfo> SizeInfo;
+
+    InvalidEntryDiagInfo(const char *Path) : Path(Path) {
+      T = Type::NegativeCaching;
+    }
+
+    InvalidEntryDiagInfo(const char *Path, uint64_t CachedSize,
+                         uint64_t ActualSize)
+        : Path(Path), SizeInfo({CachedSize, ActualSize}) {
+      T = Type::SizeChanged;
+    }
+  };
+
+  /// Visits all cached entries and re-stat an entry using UnderlyingFS to check
+  /// if the cache contains invalid entries. An entry can be invalid for two
+  /// reasons:
+  ///  1. The entry contains a stat error, indicating the file did not exist
+  ///     in the cache, but the file exists on the UnderlyingFS.
+  ///  2. The entry is associated with a file whose size is different from the
+  ///     actual size on the UnderlyingFS.
+  std::vector<InvalidEntryDiagInfo>
+  getInvalidEntryDiagInfo(llvm::vfs::FileSystem &UnderlyingFS) const;
 
 private:
   std::unique_ptr<CacheShard[]> CacheShards;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 140833050f4e9..64b41a482f921 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -107,31 +107,39 @@ DependencyScanningFilesystemSharedCache::getShardForUID(
   return CacheShards[Hash % NumShards];
 }
 
-std::vector<StringRef>
-DependencyScanningFilesystemSharedCache::getInvalidNegativeStatCachedPaths(
+std::vector<DependencyScanningFilesystemSharedCache::InvalidEntryDiagInfo>
+DependencyScanningFilesystemSharedCache::getInvalidEntryDiagInfo(
     llvm::vfs::FileSystem &UnderlyingFS) const {
   // Iterate through all shards and look for cached stat errors.
-  std::vector<StringRef> InvalidPaths;
+  std::vector<InvalidEntryDiagInfo> InvalidDiagInfo;
   for (unsigned i = 0; i < NumShards; i++) {
     const CacheShard &Shard = CacheShards[i];
     std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
     for (const auto &[Path, CachedPair] : Shard.CacheByFilename) {
       const CachedFileSystemEntry *Entry = CachedPair.first;
 
-      if (Entry->getError()) {
-        // Only examine cached errors.
-        llvm::ErrorOr<llvm::vfs::Status> Stat = UnderlyingFS.status(Path);
-        if (Stat) {
+      llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path);
+      if (Status) {
+        if (Entry->getError()) {
           // This is the case where we have cached the non-existence
-          // of the file at Path first, and a a file at the path is created
+          // of the file at Path first, and a file at the path is created
           // later. The cache entry is not invalidated (as we have no good
           // way to do it now), which may lead to missing file build errors.
-          InvalidPaths.push_back(Path);
+          InvalidDiagInfo.emplace_back(Path.data());
+        } else {
+          llvm::vfs::Status CachedStatus = Entry->getStatus();
+          uint64_t CachedSize = CachedStatus.getSize();
+          uint64_t ActualSize = Status->getSize();
+          if (CachedSize != ActualSize) {
+            // This is the case where the cached file has a different size
+            // from the actual file that comes from the underlying FS.
+            InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize);
+          }
         }
       }
     }
   }
-  return InvalidPaths;
+  return InvalidDiagInfo;
 }
 
 const CachedFileSystemEntry *
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 7420743c97a2a..068212696943a 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -187,18 +187,47 @@ TEST(DependencyScanningFilesystem, DiagnoseStaleStatFailures) {
   DependencyScanningWorkerFilesystem DepFS(SharedCache, InMemoryFS);
 
   bool Path1Exists = DepFS.exists("/path1.suffix");
-  EXPECT_EQ(Path1Exists, false);
+  ASSERT_EQ(Path1Exists, false);
 
   // Adding a file that has been stat-ed,
   InMemoryFS->addFile("/path1.suffix", 0, llvm::MemoryBuffer::getMemBuffer(""));
   Path1Exists = DepFS.exists("/path1.suffix");
   // Due to caching in SharedCache, path1 should not exist in
   // DepFS's eyes.
-  EXPECT_EQ(Path1Exists, false);
+  ASSERT_EQ(Path1Exists, false);
 
-  std::vector<llvm::StringRef> InvalidPaths =
-      SharedCache.getInvalidNegativeStatCachedPaths(*InMemoryFS);
+  auto InvalidEntries = SharedCache.getInvalidEntryDiagInfo(*InMemoryFS);
 
-  EXPECT_EQ(InvalidPaths.size(), 1u);
-  ASSERT_STREQ("/path1.suffix", InvalidPaths[0].str().c_str());
+  EXPECT_EQ(InvalidEntries.size(), 1u);
+  ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
+}
+
+TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) {
+  auto InMemoryFS1 = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  auto InMemoryFS2 = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS1->setCurrentWorkingDirectory("/");
+  InMemoryFS2->setCurrentWorkingDirectory("/");
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, InMemoryFS1);
+
+  InMemoryFS1->addFile("/path1.suffix", 0,
+                       llvm::MemoryBuffer::getMemBuffer(""));
+  bool Path1Exists = DepFS.exists("/path1.suffix");
+  ASSERT_EQ(Path1Exists, true);
+
+  // Add a file to a new FS that has the same path but different content.
+  InMemoryFS2->addFile("/path1.suffix", 1,
+                       llvm::MemoryBuffer::getMemBuffer("        "));
+
+  // Check against the new file system. InMemoryFS2 could be the underlying
+  // physical system in the real world.
+  auto InvalidEntries = SharedCache.getInvalidEntryDiagInfo(*InMemoryFS2);
+
+  ASSERT_EQ(InvalidEntries.size(), 1u);
+  ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
+  auto SizeInfo = InvalidEntries[0].SizeInfo;
+  ASSERT_TRUE(SizeInfo);
+  ASSERT_EQ(SizeInfo->CachedSize, 0u);
+  ASSERT_EQ(SizeInfo->ActualSize, 8u);
 }

>From 2133d120d5cb7df27ad8d05e8debbedf80fb3b61 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 23 Jun 2025 09:29:13 -0700
Subject: [PATCH 2/3] Address review comments.

---
 .../DependencyScanningFilesystem.h            | 29 +++++++------------
 .../DependencyScanningFilesystem.cpp          |  4 +--
 .../DependencyScanningFilesystemTest.cpp      |  4 ++-
 3 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index e0656676fefff..45f08985c09ab 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -220,40 +220,33 @@ class DependencyScanningFilesystemSharedCache {
   CacheShard &getShardForFilename(StringRef Filename) const;
   CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
 
-  struct InvalidEntryDiagInfo {
+  struct OutOfDateEntry {
     // A null terminated string that contains a path.
     const char *Path = nullptr;
 
-    enum class Type : unsigned char { NegativeCaching = 1, SizeChanged = 2 };
-
-    Type T;
-
+    struct NegativelyCachedInfo {};
     struct SizeChangedInfo {
       uint64_t CachedSize = 0;
       uint64_t ActualSize = 0;
     };
 
-    std::optional<SizeChangedInfo> SizeInfo;
+    std::variant<NegativelyCachedInfo, SizeChangedInfo> Info;
 
-    InvalidEntryDiagInfo(const char *Path) : Path(Path) {
-      T = Type::NegativeCaching;
-    }
+    OutOfDateEntry(const char *Path)
+        : Path(Path), Info(NegativelyCachedInfo{}) {}
 
-    InvalidEntryDiagInfo(const char *Path, uint64_t CachedSize,
-                         uint64_t ActualSize)
-        : Path(Path), SizeInfo({CachedSize, ActualSize}) {
-      T = Type::SizeChanged;
-    }
+    OutOfDateEntry(const char *Path, uint64_t CachedSize, uint64_t ActualSize)
+        : Path(Path), Info(SizeChangedInfo{CachedSize, ActualSize}) {}
   };
 
   /// Visits all cached entries and re-stat an entry using UnderlyingFS to check
-  /// if the cache contains invalid entries. An entry can be invalid for two
-  /// reasons:
+  /// if the cache contains out-of-date entries. An entry can be out-of-date for
+  /// two reasons:
   ///  1. The entry contains a stat error, indicating the file did not exist
   ///     in the cache, but the file exists on the UnderlyingFS.
   ///  2. The entry is associated with a file whose size is different from the
-  ///     actual size on the UnderlyingFS.
-  std::vector<InvalidEntryDiagInfo>
+  ///     size of the file on the same path on the UnderlyingFS.
+  std::vector<OutOfDateEntry>
   getInvalidEntryDiagInfo(llvm::vfs::FileSystem &UnderlyingFS) const;
 
 private:
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 64b41a482f921..aeb5ea5bf1668 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -107,11 +107,11 @@ DependencyScanningFilesystemSharedCache::getShardForUID(
   return CacheShards[Hash % NumShards];
 }
 
-std::vector<DependencyScanningFilesystemSharedCache::InvalidEntryDiagInfo>
+std::vector<DependencyScanningFilesystemSharedCache::OutOfDateEntry>
 DependencyScanningFilesystemSharedCache::getInvalidEntryDiagInfo(
     llvm::vfs::FileSystem &UnderlyingFS) const {
   // Iterate through all shards and look for cached stat errors.
-  std::vector<InvalidEntryDiagInfo> InvalidDiagInfo;
+  std::vector<OutOfDateEntry> InvalidDiagInfo;
   for (unsigned i = 0; i < NumShards; i++) {
     const CacheShard &Shard = CacheShards[i];
     std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 068212696943a..ec76361ee205e 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -226,7 +226,9 @@ TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) {
 
   ASSERT_EQ(InvalidEntries.size(), 1u);
   ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
-  auto SizeInfo = InvalidEntries[0].SizeInfo;
+  auto SizeInfo = std::get_if<
+      DependencyScanningFilesystemSharedCache::OutOfDateEntry::SizeChangedInfo>(
+      &InvalidEntries[0].Info);
   ASSERT_TRUE(SizeInfo);
   ASSERT_EQ(SizeInfo->CachedSize, 0u);
   ASSERT_EQ(SizeInfo->ActualSize, 8u);

>From 030078322e884152b02ddcb653cd9c9a00b1c4ae Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 23 Jun 2025 10:33:29 -0700
Subject: [PATCH 3/3] Fix build failures after merge.

---
 .../DependencyScanning/DependencyScanningFilesystem.h |  1 +
 .../DependencyScanningFilesystemTest.cpp              | 11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index f0c3cf6559d05..9ecc71b2043a9 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -18,6 +18,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include <mutex>
 #include <optional>
+#include <variant>
 
 namespace clang {
 namespace tooling {
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index b9e415af2994f..878e5c53c9afa 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -203,7 +203,8 @@ TEST(DependencyScanningFilesystem, DiagnoseStaleStatFailures) {
   // DepFS's eyes.
   ASSERT_EQ(Path1Exists, false);
 
-  auto InvalidEntries = SharedCache.getInvalidEntryDiagInfo(*InMemoryFS);
+  auto InvalidEntries =
+      Service.getSharedCache().getInvalidEntryDiagInfo(*InMemoryFS);
 
   EXPECT_EQ(InvalidEntries.size(), 1u);
   ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
@@ -215,8 +216,9 @@ TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) {
   InMemoryFS1->setCurrentWorkingDirectory("/");
   InMemoryFS2->setCurrentWorkingDirectory("/");
 
-  DependencyScanningFilesystemSharedCache SharedCache;
-  DependencyScanningWorkerFilesystem DepFS(SharedCache, InMemoryFS1);
+  DependencyScanningService Service(ScanningMode::DependencyDirectivesScan,
+                                    ScanningOutputFormat::Make);
+  DependencyScanningWorkerFilesystem DepFS(Service, InMemoryFS1);
 
   InMemoryFS1->addFile("/path1.suffix", 0,
                        llvm::MemoryBuffer::getMemBuffer(""));
@@ -229,7 +231,8 @@ TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) {
 
   // Check against the new file system. InMemoryFS2 could be the underlying
   // physical system in the real world.
-  auto InvalidEntries = SharedCache.getInvalidEntryDiagInfo(*InMemoryFS2);
+  auto InvalidEntries =
+      Service.getSharedCache().getInvalidEntryDiagInfo(*InMemoryFS2);
 
   ASSERT_EQ(InvalidEntries.size(), 1u);
   ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);



More information about the cfe-commits mailing list