[clang] [clang Dependency Scanning] Enhance File Caching Diagnostics (PR #144105)
Qiongsi Wu via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 23 09:32:40 PDT 2025
https://github.com/qiongsiwu updated https://github.com/llvm/llvm-project/pull/144105
>From 2187de3d30045a02c03ca009f2497608ab6bc6a4 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Thu, 12 Jun 2025 16:05:32 -0700
Subject: [PATCH 1/2] Enhancing DependencyScanningFilesystemSharedCache's API
that reports invalid entries.
---
.../DependencyScanningFilesystem.h | 42 +++++++++++++++----
.../DependencyScanningFilesystem.cpp | 28 ++++++++-----
.../DependencyScanningFilesystemTest.cpp | 41 +++++++++++++++---
3 files changed, 88 insertions(+), 23 deletions(-)
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index a20a89a4c2b76..e0656676fefff 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -220,13 +220,41 @@ class DependencyScanningFilesystemSharedCache {
CacheShard &getShardForFilename(StringRef Filename) const;
CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
- /// Visits all cached entries and re-stat an entry using FS if
- /// it is negatively stat cached. If re-stat succeeds on a path,
- /// the path is added to InvalidPaths, indicating that the cache
- /// may have erroneously negatively cached it. The caller can then
- /// use InvalidPaths to issue diagnostics.
- std::vector<StringRef>
- getInvalidNegativeStatCachedPaths(llvm::vfs::FileSystem &UnderlyingFS) const;
+ struct InvalidEntryDiagInfo {
+ // A null terminated string that contains a path.
+ const char *Path = nullptr;
+
+ enum class Type : unsigned char { NegativeCaching = 1, SizeChanged = 2 };
+
+ Type T;
+
+ struct SizeChangedInfo {
+ uint64_t CachedSize = 0;
+ uint64_t ActualSize = 0;
+ };
+
+ std::optional<SizeChangedInfo> SizeInfo;
+
+ InvalidEntryDiagInfo(const char *Path) : Path(Path) {
+ T = Type::NegativeCaching;
+ }
+
+ InvalidEntryDiagInfo(const char *Path, uint64_t CachedSize,
+ uint64_t ActualSize)
+ : Path(Path), SizeInfo({CachedSize, ActualSize}) {
+ T = Type::SizeChanged;
+ }
+ };
+
+ /// Visits all cached entries and re-stat an entry using UnderlyingFS to check
+ /// if the cache contains invalid entries. An entry can be invalid for two
+ /// reasons:
+ /// 1. The entry contains a stat error, indicating the file did not exist
+ /// in the cache, but the file exists on the UnderlyingFS.
+ /// 2. The entry is associated with a file whose size is different from the
+ /// actual size on the UnderlyingFS.
+ std::vector<InvalidEntryDiagInfo>
+ getInvalidEntryDiagInfo(llvm::vfs::FileSystem &UnderlyingFS) const;
private:
std::unique_ptr<CacheShard[]> CacheShards;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 140833050f4e9..64b41a482f921 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -107,31 +107,39 @@ DependencyScanningFilesystemSharedCache::getShardForUID(
return CacheShards[Hash % NumShards];
}
-std::vector<StringRef>
-DependencyScanningFilesystemSharedCache::getInvalidNegativeStatCachedPaths(
+std::vector<DependencyScanningFilesystemSharedCache::InvalidEntryDiagInfo>
+DependencyScanningFilesystemSharedCache::getInvalidEntryDiagInfo(
llvm::vfs::FileSystem &UnderlyingFS) const {
// Iterate through all shards and look for cached stat errors.
- std::vector<StringRef> InvalidPaths;
+ std::vector<InvalidEntryDiagInfo> InvalidDiagInfo;
for (unsigned i = 0; i < NumShards; i++) {
const CacheShard &Shard = CacheShards[i];
std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
for (const auto &[Path, CachedPair] : Shard.CacheByFilename) {
const CachedFileSystemEntry *Entry = CachedPair.first;
- if (Entry->getError()) {
- // Only examine cached errors.
- llvm::ErrorOr<llvm::vfs::Status> Stat = UnderlyingFS.status(Path);
- if (Stat) {
+ llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path);
+ if (Status) {
+ if (Entry->getError()) {
// This is the case where we have cached the non-existence
- // of the file at Path first, and a a file at the path is created
+ // of the file at Path first, and a file at the path is created
// later. The cache entry is not invalidated (as we have no good
// way to do it now), which may lead to missing file build errors.
- InvalidPaths.push_back(Path);
+ InvalidDiagInfo.emplace_back(Path.data());
+ } else {
+ llvm::vfs::Status CachedStatus = Entry->getStatus();
+ uint64_t CachedSize = CachedStatus.getSize();
+ uint64_t ActualSize = Status->getSize();
+ if (CachedSize != ActualSize) {
+ // This is the case where the cached file has a different size
+ // from the actual file that comes from the underlying FS.
+ InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize);
+ }
}
}
}
}
- return InvalidPaths;
+ return InvalidDiagInfo;
}
const CachedFileSystemEntry *
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 7420743c97a2a..068212696943a 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -187,18 +187,47 @@ TEST(DependencyScanningFilesystem, DiagnoseStaleStatFailures) {
DependencyScanningWorkerFilesystem DepFS(SharedCache, InMemoryFS);
bool Path1Exists = DepFS.exists("/path1.suffix");
- EXPECT_EQ(Path1Exists, false);
+ ASSERT_EQ(Path1Exists, false);
// Adding a file that has been stat-ed,
InMemoryFS->addFile("/path1.suffix", 0, llvm::MemoryBuffer::getMemBuffer(""));
Path1Exists = DepFS.exists("/path1.suffix");
// Due to caching in SharedCache, path1 should not exist in
// DepFS's eyes.
- EXPECT_EQ(Path1Exists, false);
+ ASSERT_EQ(Path1Exists, false);
- std::vector<llvm::StringRef> InvalidPaths =
- SharedCache.getInvalidNegativeStatCachedPaths(*InMemoryFS);
+ auto InvalidEntries = SharedCache.getInvalidEntryDiagInfo(*InMemoryFS);
- EXPECT_EQ(InvalidPaths.size(), 1u);
- ASSERT_STREQ("/path1.suffix", InvalidPaths[0].str().c_str());
+ EXPECT_EQ(InvalidEntries.size(), 1u);
+ ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
+}
+
+TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) {
+ auto InMemoryFS1 = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ auto InMemoryFS2 = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ InMemoryFS1->setCurrentWorkingDirectory("/");
+ InMemoryFS2->setCurrentWorkingDirectory("/");
+
+ DependencyScanningFilesystemSharedCache SharedCache;
+ DependencyScanningWorkerFilesystem DepFS(SharedCache, InMemoryFS1);
+
+ InMemoryFS1->addFile("/path1.suffix", 0,
+ llvm::MemoryBuffer::getMemBuffer(""));
+ bool Path1Exists = DepFS.exists("/path1.suffix");
+ ASSERT_EQ(Path1Exists, true);
+
+ // Add a file to a new FS that has the same path but different content.
+ InMemoryFS2->addFile("/path1.suffix", 1,
+ llvm::MemoryBuffer::getMemBuffer(" "));
+
+ // Check against the new file system. InMemoryFS2 could be the underlying
+ // physical system in the real world.
+ auto InvalidEntries = SharedCache.getInvalidEntryDiagInfo(*InMemoryFS2);
+
+ ASSERT_EQ(InvalidEntries.size(), 1u);
+ ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
+ auto SizeInfo = InvalidEntries[0].SizeInfo;
+ ASSERT_TRUE(SizeInfo);
+ ASSERT_EQ(SizeInfo->CachedSize, 0u);
+ ASSERT_EQ(SizeInfo->ActualSize, 8u);
}
>From 2133d120d5cb7df27ad8d05e8debbedf80fb3b61 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qiongsi_wu at apple.com>
Date: Mon, 23 Jun 2025 09:29:13 -0700
Subject: [PATCH 2/2] Address review comments.
---
.../DependencyScanningFilesystem.h | 29 +++++++------------
.../DependencyScanningFilesystem.cpp | 4 +--
.../DependencyScanningFilesystemTest.cpp | 4 ++-
3 files changed, 16 insertions(+), 21 deletions(-)
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index e0656676fefff..45f08985c09ab 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -220,40 +220,33 @@ class DependencyScanningFilesystemSharedCache {
CacheShard &getShardForFilename(StringRef Filename) const;
CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
- struct InvalidEntryDiagInfo {
+ struct OutOfDateEntry {
// A null terminated string that contains a path.
const char *Path = nullptr;
- enum class Type : unsigned char { NegativeCaching = 1, SizeChanged = 2 };
-
- Type T;
-
+ struct NegativelyCachedInfo {};
struct SizeChangedInfo {
uint64_t CachedSize = 0;
uint64_t ActualSize = 0;
};
- std::optional<SizeChangedInfo> SizeInfo;
+ std::variant<NegativelyCachedInfo, SizeChangedInfo> Info;
- InvalidEntryDiagInfo(const char *Path) : Path(Path) {
- T = Type::NegativeCaching;
- }
+ OutOfDateEntry(const char *Path)
+ : Path(Path), Info(NegativelyCachedInfo{}) {}
- InvalidEntryDiagInfo(const char *Path, uint64_t CachedSize,
- uint64_t ActualSize)
- : Path(Path), SizeInfo({CachedSize, ActualSize}) {
- T = Type::SizeChanged;
- }
+ OutOfDateEntry(const char *Path, uint64_t CachedSize, uint64_t ActualSize)
+ : Path(Path), Info(SizeChangedInfo{CachedSize, ActualSize}) {}
};
/// Visits all cached entries and re-stat an entry using UnderlyingFS to check
- /// if the cache contains invalid entries. An entry can be invalid for two
- /// reasons:
+ /// if the cache contains out-of-date entries. An entry can be out-of-date for
+ /// two reasons:
/// 1. The entry contains a stat error, indicating the file did not exist
/// in the cache, but the file exists on the UnderlyingFS.
/// 2. The entry is associated with a file whose size is different from the
- /// actual size on the UnderlyingFS.
- std::vector<InvalidEntryDiagInfo>
+ /// size of the file on the same path on the UnderlyingFS.
+ std::vector<OutOfDateEntry>
getInvalidEntryDiagInfo(llvm::vfs::FileSystem &UnderlyingFS) const;
private:
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 64b41a482f921..aeb5ea5bf1668 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -107,11 +107,11 @@ DependencyScanningFilesystemSharedCache::getShardForUID(
return CacheShards[Hash % NumShards];
}
-std::vector<DependencyScanningFilesystemSharedCache::InvalidEntryDiagInfo>
+std::vector<DependencyScanningFilesystemSharedCache::OutOfDateEntry>
DependencyScanningFilesystemSharedCache::getInvalidEntryDiagInfo(
llvm::vfs::FileSystem &UnderlyingFS) const {
// Iterate through all shards and look for cached stat errors.
- std::vector<InvalidEntryDiagInfo> InvalidDiagInfo;
+ std::vector<OutOfDateEntry> InvalidDiagInfo;
for (unsigned i = 0; i < NumShards; i++) {
const CacheShard &Shard = CacheShards[i];
std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 068212696943a..ec76361ee205e 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -226,7 +226,9 @@ TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) {
ASSERT_EQ(InvalidEntries.size(), 1u);
ASSERT_STREQ("/path1.suffix", InvalidEntries[0].Path);
- auto SizeInfo = InvalidEntries[0].SizeInfo;
+ auto SizeInfo = std::get_if<
+ DependencyScanningFilesystemSharedCache::OutOfDateEntry::SizeChangedInfo>(
+ &InvalidEntries[0].Info);
ASSERT_TRUE(SizeInfo);
ASSERT_EQ(SizeInfo->CachedSize, 0u);
ASSERT_EQ(SizeInfo->ActualSize, 8u);
More information about the cfe-commits
mailing list