[clang] [llvm] Reapply "[clang][deps] Add in-flight query caching to `DependencyScanningFilesystemSharedCache`" (#202804) (PR #202881)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 10 00:59:49 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-support
Author: Artem Chikin (artemcm)
<details>
<summary>Changes</summary>
Revert the revert in https://github.com/llvm/llvm-project/pull/202804, and add an additional guard for the test which is not applicable on all platforms.
---
Patch is 45.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/202881.diff
7 Files Affected:
- (modified) clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h (+72-143)
- (modified) clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp (+142-145)
- (modified) clang/unittests/DependencyScanning/CMakeLists.txt (+2)
- (modified) clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp (+216)
- (modified) llvm/include/llvm/Support/VirtualFileSystem.h (+56-13)
- (modified) llvm/lib/Support/VirtualFileSystem.cpp (-26)
- (modified) llvm/unittests/Support/VirtualFileSystemTest.cpp (+30)
``````````diff
diff --git a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h
index c9ffc202ad694..ec95c5b38f102 100644
--- a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h
@@ -16,6 +16,8 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include <condition_variable>
+#include <memory>
#include <mutex>
#include <optional>
#include <variant>
@@ -154,19 +156,53 @@ using CachedRealPath = llvm::ErrorOr<std::string>;
/// the worker threads.
class DependencyScanningFilesystemSharedCache {
public:
+ /// In-flight slot used to dedup concurrent producers for the same key.
+ /// The producer publishes via \c publish(); waiters block on \c Mutex/CV
+ /// until \c Done is set. \c Result holds the resolved entry, or an
+ /// uncached-negative error shared with overlapping waiters but not
+ /// persisted in the shard.
+ struct InProgressEntry {
+ std::mutex Mutex;
+ std::condition_variable CondVar;
+ bool Done = false;
+ llvm::ErrorOr<const CachedFileSystemEntry *> Result = std::error_code{};
+
+ /// Publishes the producer's outcome to this slot and wakes all waiters.
+ void publish(llvm::ErrorOr<const CachedFileSystemEntry *> R) {
+ {
+ std::lock_guard<std::mutex> EntryLock(Mutex);
+ assert(!Done && "slot already published");
+ Result = R;
+ Done = true;
+ }
+ CondVar.notify_all();
+ }
+ };
+
struct CacheShard {
/// The mutex that needs to be locked before mutation of any member.
mutable std::mutex CacheLock;
- /// Map from filenames to cached entries and real paths.
- llvm::StringMap<
- std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
- llvm::BumpPtrAllocator>
- CacheByFilename;
+ /// Cache state per filename: resolved entry, real path, and an in-flight
+ /// slot (if any). \c InProgress is reset on publish.
+ struct FilenameCacheState {
+ const CachedFileSystemEntry *Entry = nullptr;
+ const CachedRealPath *RealPath = nullptr;
+ std::shared_ptr<InProgressEntry> InProgress;
+ };
+
+ /// Cache state stored per unique ID; similar to
+ /// \c FilenameCacheState.
+ struct UIDCacheState {
+ const CachedFileSystemEntry *Entry = nullptr;
+ std::shared_ptr<InProgressEntry> InProgress;
+ };
+
+ /// Map from filenames to their cached state.
+ llvm::StringMap<FilenameCacheState, llvm::BumpPtrAllocator> CacheByFilename;
- /// Map from unique IDs to cached entries.
- llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
- EntriesByUID;
+ /// Map from unique IDs to their cached state.
+ llvm::DenseMap<llvm::sys::fs::UniqueID, UIDCacheState> EntriesByUID;
/// The backing storage for cached entries.
llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
@@ -177,33 +213,6 @@ class DependencyScanningFilesystemSharedCache {
/// The backing storage for cached real paths.
llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
- /// Returns entry associated with the filename or nullptr if none is found.
- const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
-
- /// Returns entry associated with the unique ID or nullptr if none is found.
- const CachedFileSystemEntry *
- findEntryByUID(llvm::sys::fs::UniqueID UID) const;
-
- /// Returns entry associated with the filename if there is some. Otherwise,
- /// constructs new one with the given status, associates it with the
- /// filename and returns the result.
- const CachedFileSystemEntry &
- getOrEmplaceEntryForFilename(StringRef Filename,
- llvm::ErrorOr<llvm::vfs::Status> Stat);
-
- /// Returns entry associated with the unique ID if there is some. Otherwise,
- /// constructs new one with the given status and contents, associates it
- /// with the unique ID and returns the result.
- const CachedFileSystemEntry &
- getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
- std::unique_ptr<llvm::MemoryBuffer> Contents);
-
- /// Returns entry associated with the filename if there is some. Otherwise,
- /// associates the given entry with the filename and returns it.
- const CachedFileSystemEntry &
- getOrInsertEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry);
-
/// Returns the real path associated with the filename or nullptr if none is
/// found.
const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
@@ -256,65 +265,6 @@ class DependencyScanningFilesystemSharedCache {
unsigned NumShards;
};
-/// This class is a local cache, that caches the 'stat' and 'open' calls to the
-/// underlying real file system.
-class DependencyScanningFilesystemLocalCache {
- llvm::StringMap<
- std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
- llvm::BumpPtrAllocator>
- Cache;
-
-public:
- /// Returns entry associated with the filename or nullptr if none is found.
- const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto It = Cache.find(Filename);
- return It == Cache.end() ? nullptr : It->getValue().first;
- }
-
- /// Associates the given entry with the filename and returns the given entry
- /// pointer (for convenience).
- const CachedFileSystemEntry &
- insertEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry) {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!Inserted) {
- // The file is already present in the local cache. If we got here, it only
- // contains the real path. Let's make sure the entry is populated too.
- assert((!CachedEntry && CachedRealPath) && "entry already present");
- CachedEntry = &Entry;
- }
- return *CachedEntry;
- }
-
- /// Returns real path associated with the filename or nullptr if none is
- /// found.
- const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto It = Cache.find(Filename);
- return It == Cache.end() ? nullptr : It->getValue().second;
- }
-
- /// Associates the given real path with the filename and returns the given
- /// entry pointer (for convenience).
- const CachedRealPath &
- insertRealPathForFilename(StringRef Filename,
- const CachedRealPath &RealPath) {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!Inserted) {
- // The file is already present in the local cache. If we got here, it only
- // contains the entry. Let's make sure the real path is populated too.
- assert((!CachedRealPath && CachedEntry) && "real path already present");
- CachedRealPath = &RealPath;
- }
- return *CachedRealPath;
- }
-};
-
/// Reference to a CachedFileSystemEntry.
/// If the underlying entry is an opened file, this wrapper returns the file
/// contents and the scanned preprocessor directives.
@@ -411,14 +361,23 @@ class DependencyScanningWorkerFilesystem
bool exists(const Twine &Path) override;
private:
- /// For a filename that's not yet associated with any entry in the caches,
- /// uses the underlying filesystem to either look up the entry based in the
- /// shared cache indexed by unique ID, or creates new entry from scratch.
- /// \p FilenameForLookup will always be an absolute path, and different than
- /// \p OriginalFilename if \p OriginalFilename is relative.
- llvm::ErrorOr<const CachedFileSystemEntry &>
- computeAndStoreResult(StringRef OriginalFilename,
- StringRef FilenameForLookup);
+ /// Resolves the cache entry for \p FilenameForLookup through the shared
+ /// cache: returns an entry already produced by another worker (a cache hit
+ /// or the result of an in-flight wait), or claims the producer slot and
+ /// computes the entry via the underlying filesystem.
+ /// \p FilenameForLookup is always absolute, and may differ from
+ /// \p OriginalFilename if the latter is relative.
+ llvm::ErrorOr<const CachedFileSystemEntry *>
+ resolveFilenameThroughSharedCache(StringRef OriginalFilename,
+ StringRef FilenameForLookup);
+
+ /// Resolves the cache entry for the on-disk file identified by \p Stat
+ /// through the UID-keyed shared cache. Reads the file's contents on the
+ /// producer path. Always returns a non-null entry (which may carry a
+ /// readFile error).
+ const CachedFileSystemEntry *
+ resolveUIDThroughSharedCache(StringRef OriginalFilename,
+ const llvm::vfs::Status &Stat);
/// Represents a filesystem entry that has been stat-ed (and potentially read)
/// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
@@ -435,46 +394,6 @@ class DependencyScanningWorkerFilesystem
/// in status and size of read contents.
llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
- /// Returns entry associated with the unique ID of the given tentative entry
- /// if there is some in the shared cache. Otherwise, constructs new one,
- /// associates it with the unique ID and returns the result.
- const CachedFileSystemEntry &
- getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
-
- /// Returns entry associated with the filename or nullptr if none is found.
- ///
- /// Returns entry from local cache if there is some. Otherwise, if the entry
- /// is found in the shared cache, writes it through the local cache and
- /// returns it. Otherwise returns nullptr.
- const CachedFileSystemEntry *
- findEntryByFilenameWithWriteThrough(StringRef Filename);
-
- /// Returns entry associated with the unique ID in the shared cache or nullptr
- /// if none is found.
- const CachedFileSystemEntry *
- findSharedEntryByUID(llvm::vfs::Status Stat) const;
-
- /// Associates the given entry with the filename in the local cache and
- /// returns it.
- const CachedFileSystemEntry &
- insertLocalEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry) {
- return LocalCache.insertEntryForFilename(Filename, Entry);
- }
-
- /// Returns entry associated with the filename in the shared cache if there is
- /// some. Otherwise, constructs new one with the given error code, associates
- /// it with the filename and returns the result.
- const CachedFileSystemEntry &
- getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC);
-
- /// Returns entry associated with the filename in the shared cache if there is
- /// some. Otherwise, associates the given entry with the filename and returns
- /// it.
- const CachedFileSystemEntry &
- getOrInsertSharedEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry);
-
void printImpl(raw_ostream &OS, PrintType Type,
unsigned IndentLevel) const override {
printIndent(OS, IndentLevel);
@@ -484,9 +403,19 @@ class DependencyScanningWorkerFilesystem
/// The service associated with this VFS.
DependencyScanningService &Service;
+
+ /// Per-filename state cached locally by this worker thread, so repeated
+ /// queries can be served without touching the shared cache. The entry and
+ /// real path are arena-owned by the shared cache and outlive this worker, so
+ /// borrowing raw pointers here is safe.
+ struct LocalEntry {
+ const CachedFileSystemEntry *File = nullptr;
+ const CachedRealPath *RealPath = nullptr;
+ };
+
/// The local cache is used by the worker thread to cache file system queries
/// locally instead of querying the global cache every time.
- DependencyScanningFilesystemLocalCache LocalCache;
+ llvm::StringMap<LocalEntry, llvm::BumpPtrAllocator> LocalCache;
/// The working directory to use for making relative paths absolute before
/// using them for cache lookups.
diff --git a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp
index 6ff6cd9040665..3b00411b7d2db 100644
--- a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -115,8 +115,13 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries(
for (unsigned i = 0; i < NumShards; i++) {
const CacheShard &Shard = CacheShards[i];
std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
- for (const auto &[Path, CachedPair] : Shard.CacheByFilename) {
- const CachedFileSystemEntry *Entry = CachedPair.first;
+ for (const auto &[Path, State] : Shard.CacheByFilename) {
+ const CachedFileSystemEntry *Entry = State.Entry;
+ // Skip slots without a resolved entry: real-path-only entries from
+ // getRealPath, or uncached negative stats. Runs post-scan, so no
+ // in-progress slots remain.
+ if (!Entry)
+ continue;
llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path);
if (Status) {
if (Entry->getError()) {
@@ -151,69 +156,43 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries(
return InvalidDiagInfo;
}
-const CachedFileSystemEntry *
-DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
- StringRef Filename) const {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto It = CacheByFilename.find(Filename);
- return It == CacheByFilename.end() ? nullptr : It->getValue().first;
-}
-
-const CachedFileSystemEntry *
-DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
- llvm::sys::fs::UniqueID UID) const {
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto It = EntriesByUID.find(UID);
- return It == EntriesByUID.end() ? nullptr : It->getSecond();
-}
-
-const CachedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::CacheShard::
- getOrEmplaceEntryForFilename(StringRef Filename,
- llvm::ErrorOr<llvm::vfs::Status> Stat) {
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!CachedEntry) {
- // The entry is not present in the shared cache. Either the cache doesn't
- // know about the file at all, or it only knows about its real path.
- assert((Inserted || CachedRealPath) && "existing file with empty pair");
- CachedEntry =
- new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
- }
- return *CachedEntry;
-}
+namespace {
+using InProgressEntry =
+ DependencyScanningFilesystemSharedCache::InProgressEntry;
+using SlotResolved = llvm::ErrorOr<const CachedFileSystemEntry *>;
+using SlotProducer = std::shared_ptr<InProgressEntry>;
+using SlotAcquisitionResult = std::variant<SlotResolved, SlotProducer>;
+
+/// Returns a resolved entry if one is already present or in-flight under
+/// \p K; otherwise installs a fresh \c InProgressEntry and returns it as a
+/// producer slot.
+template <typename Map, typename Key>
+SlotAcquisitionResult acquireSlot(std::mutex &CacheLock, Map &M, const Key &K) {
+ std::shared_ptr<InProgressEntry> Pending;
+ {
+ std::lock_guard<std::mutex> ShardLock(CacheLock);
+ auto &State = M[K];
+
+ // Cache hit.
+ if (State.Entry)
+ return SlotResolved{State.Entry};
+
+ if (!State.InProgress) {
+ State.InProgress = std::make_shared<InProgressEntry>();
+ return SlotProducer{State.InProgress};
+ }
-const CachedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
- llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
- std::unique_ptr<llvm::MemoryBuffer> Contents) {
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto [It, Inserted] = EntriesByUID.try_emplace(UID);
- auto &CachedEntry = It->getSecond();
- if (Inserted) {
- CachedFileContents *StoredContents = nullptr;
- if (Contents)
- StoredContents = new (ContentsStorage.Allocate())
- CachedFileContents(std::move(Contents));
- CachedEntry = new (EntryStorage.Allocate())
- CachedFileSystemEntry(std::move(Stat), StoredContents);
+ // Copy the shared_ptr so the slot survives our wait once the shard lock
+ // is released and the producer resets State.InProgress on publish.
+ Pending = State.InProgress;
}
- return *CachedEntry;
-}
-const CachedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::CacheShard::
- getOrInsertEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry) {
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!Inserted || !CachedEntry)
- CachedEntry = &Entry;
- return *CachedEntry;
+ // Wait off the shard lock so unrelated keys in this shard aren't blocked.
+ std::unique_lock<std::mutex> EntryLock(Pending->Mutex);
+ Pending->CondVar.wait(EntryLock, [&] { return Pending->Done; });
+ return SlotResolved{Pending->Result};
}
+} // namespace
const CachedRealPath *
DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
@@ -221,7 +200,7 @@ DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
assert(llvm::sys::path::is_absolute_gnu(Filename));
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = CacheByFilename.find(Filename);
- return It == CacheByFilename.end() ? nullptr : It->getValue().second;
+ return It == CacheByFilename.end() ? nullptr : It->getValue().RealPath;
}
const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
@@ -229,7 +208,7 @@ const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
llvm::ErrorOr<llvm::StringRef> RealPath) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
- const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
+ const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].RealPath;
if (!StoredRealPath) {
auto OwnedRealPath = [&]() -> CachedRealPath {
if (!RealPath)
@@ -253,82 +232,98 @@ DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
updateWorkingDirForCacheLookup();
}
-const CachedFileSystemEntry &
-DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
- TentativeEntry TEntry) {
- auto &Shard =
- Service.getSharedCache().getShardForUID(TEntry.Status.getUniqueID());
- return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
- std::move(TEntry.Status),
- std::move(TEntry.Contents));
-}
-
-const CachedFileSystemEntry *
-DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
- StringRef Filename) {
- if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
- return Entry;
- auto &Shard = Service.getSharedCache().getShardForFilename(Filename);
- if (const auto *Entry = Shard.findEntryByFilename(Filename))
- return &LocalCache.insertEntryForFilename(Filename, *Entry);
- return nullptr;
-}
-
const CachedFileSystemEntry *
-DependencyScanningWorkerFilesystem::findSharedEntryByUID(
- llvm::vfs::Status Stat) const {
- return Service.getSharedCache()
- .getShardForUID(Stat.getUniqueID())
- .findEntryByUID(Stat.getUniqueID());
-}
-
-const CachedFileSystemEntry &
-DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForFilename(
- StringRef Filename, std::error_code EC) {
- return Service.getSharedCache()
- ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/202881
More information about the cfe-commits
mailing list