[clang] [llvm] [clang][deps] Add in-flight query caching to `DependencyScanningFilesystemSharedCache` (PR #199680)
Artem Chikin via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 4 08:19:23 PDT 2026
https://github.com/artemcm updated https://github.com/llvm/llvm-project/pull/199680
>From 73e486c8e67a6272cbbe7100608c55baafe6f9a4 Mon Sep 17 00:00:00 2001
From: Artem Chikin <achikin at apple.com>
Date: Thu, 28 May 2026 11:28:31 +0100
Subject: [PATCH 1/2] [Support][VFS] Make `TracingFileSystem` a template on its
counter type
Templating `TracingFileSystem` on the counter type lets the same proxy
serve single-threaded use (with `std::size_t` counters) and concurrent
use (with `std::atomic<std::size_t>`) without duplicating the class.
Introduce `TracingFileSystemImpl<CounterT>` as the underlying class
template and provide two aliases:
using TracingFileSystem = TracingFileSystemImpl<std::size_t>;
using AtomicTracingFileSystem = TracingFileSystemImpl<std::atomic<std::size_t>>;
Existing callers continue to use `TracingFileSystem` unchanged. A new
`TracingFileSystemTest.AtomicCountersUnderConcurrency` test exercises
the atomic instantiation with eight threads.
---
llvm/include/llvm/Support/VirtualFileSystem.h | 69 +++++++++++++++----
llvm/lib/Support/VirtualFileSystem.cpp | 26 -------
.../Support/VirtualFileSystemTest.cpp | 30 ++++++++
3 files changed, 86 insertions(+), 39 deletions(-)
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 5b8871b8f3db5..d22c534228331 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -27,6 +27,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
+#include <atomic>
#include <cassert>
#include <cstdint>
#include <ctime>
@@ -1156,20 +1157,28 @@ class YAMLVFSWriter {
/// File system that tracks the number of calls to the underlying file system.
/// This is particularly useful when wrapped around \c RealFileSystem to add
/// lightweight tracking of expensive syscalls.
-class LLVM_ABI TracingFileSystem
- : public llvm::RTTIExtends<TracingFileSystem, ProxyFileSystem> {
+///
+/// Templated on the counter type so callers can choose between non-atomic
+/// counters (suitable for single-threaded tracing) and atomic counters
+/// (suitable for tracing under concurrent access). Use the
+/// \c TracingFileSystem and \c AtomicTracingFileSystem aliases below.
+template <typename CounterT>
+class TracingFileSystemImpl
+ : public llvm::RTTIExtends<TracingFileSystemImpl<CounterT>,
+ ProxyFileSystem> {
public:
- static const char ID;
+ inline static const char ID = 0;
- std::size_t NumStatusCalls = 0;
- std::size_t NumOpenFileForReadCalls = 0;
- std::size_t NumDirBeginCalls = 0;
- std::size_t NumGetRealPathCalls = 0;
- std::size_t NumExistsCalls = 0;
- std::size_t NumIsLocalCalls = 0;
+ CounterT NumStatusCalls = 0;
+ CounterT NumOpenFileForReadCalls = 0;
+ CounterT NumDirBeginCalls = 0;
+ CounterT NumGetRealPathCalls = 0;
+ CounterT NumExistsCalls = 0;
+ CounterT NumIsLocalCalls = 0;
- TracingFileSystem(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
- : RTTIExtends(std::move(FS)) {}
+ TracingFileSystemImpl(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
+ : llvm::RTTIExtends<TracingFileSystemImpl<CounterT>, ProxyFileSystem>(
+ std::move(FS)) {}
ErrorOr<Status> status(const Twine &Path) override {
++NumStatusCalls;
@@ -1203,10 +1212,44 @@ class LLVM_ABI TracingFileSystem
}
protected:
- void printImpl(raw_ostream &OS, PrintType Type,
- unsigned IndentLevel) const override;
+ void printImpl(raw_ostream &OS, FileSystem::PrintType Type,
+ unsigned IndentLevel) const override {
+ FileSystem::printIndent(OS, IndentLevel);
+ OS << "TracingFileSystem\n";
+ if (Type == FileSystem::PrintType::Summary)
+ return;
+
+ FileSystem::printIndent(OS, IndentLevel);
+ OS << "NumStatusCalls=" << static_cast<std::size_t>(NumStatusCalls) << "\n";
+ FileSystem::printIndent(OS, IndentLevel);
+ OS << "NumOpenFileForReadCalls="
+ << static_cast<std::size_t>(NumOpenFileForReadCalls) << "\n";
+ FileSystem::printIndent(OS, IndentLevel);
+ OS << "NumDirBeginCalls=" << static_cast<std::size_t>(NumDirBeginCalls)
+ << "\n";
+ FileSystem::printIndent(OS, IndentLevel);
+ OS << "NumGetRealPathCalls="
+ << static_cast<std::size_t>(NumGetRealPathCalls) << "\n";
+ FileSystem::printIndent(OS, IndentLevel);
+ OS << "NumExistsCalls=" << static_cast<std::size_t>(NumExistsCalls) << "\n";
+ FileSystem::printIndent(OS, IndentLevel);
+ OS << "NumIsLocalCalls=" << static_cast<std::size_t>(NumIsLocalCalls)
+ << "\n";
+
+ if (Type == FileSystem::PrintType::Contents)
+ Type = FileSystem::PrintType::Summary;
+ this->getUnderlyingFS().print(OS, Type, IndentLevel + 1);
+ }
};
+/// Single-threaded tracing filesystem. Counters are plain \c std::size_t and
+/// must not be incremented concurrently.
+using TracingFileSystem = TracingFileSystemImpl<std::size_t>;
+
+/// Concurrent-safe tracing filesystem. Counters are \c std::atomic<std::size_t>
+/// so the proxy can be shared across threads.
+using AtomicTracingFileSystem = TracingFileSystemImpl<std::atomic<std::size_t>>;
+
} // namespace vfs
} // namespace llvm
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 69f3bb8582b87..42e8bb4f9958e 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -2991,34 +2991,8 @@ recursive_directory_iterator::increment(std::error_code &EC) {
return *this;
}
-void TracingFileSystem::printImpl(raw_ostream &OS, PrintType Type,
- unsigned IndentLevel) const {
- printIndent(OS, IndentLevel);
- OS << "TracingFileSystem\n";
- if (Type == PrintType::Summary)
- return;
-
- printIndent(OS, IndentLevel);
- OS << "NumStatusCalls=" << NumStatusCalls << "\n";
- printIndent(OS, IndentLevel);
- OS << "NumOpenFileForReadCalls=" << NumOpenFileForReadCalls << "\n";
- printIndent(OS, IndentLevel);
- OS << "NumDirBeginCalls=" << NumDirBeginCalls << "\n";
- printIndent(OS, IndentLevel);
- OS << "NumGetRealPathCalls=" << NumGetRealPathCalls << "\n";
- printIndent(OS, IndentLevel);
- OS << "NumExistsCalls=" << NumExistsCalls << "\n";
- printIndent(OS, IndentLevel);
- OS << "NumIsLocalCalls=" << NumIsLocalCalls << "\n";
-
- if (Type == PrintType::Contents)
- Type = PrintType::Summary;
- getUnderlyingFS().print(OS, Type, IndentLevel + 1);
-}
-
const char FileSystem::ID = 0;
const char OverlayFileSystem::ID = 0;
const char ProxyFileSystem::ID = 0;
const char InMemoryFileSystem::ID = 0;
const char RedirectingFileSystem::ID = 0;
-const char TracingFileSystem::ID = 0;
diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index bb0172b28e373..507a0e4108a79 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -22,6 +22,8 @@
#include "gtest/gtest.h"
#include <map>
#include <string>
+#include <thread>
+#include <vector>
using namespace llvm;
using llvm::sys::fs::UniqueID;
@@ -3732,3 +3734,31 @@ TEST(TracingFileSystemTest, PrintOutput) {
" InMemoryFileSystem\n",
Output);
}
+
+TEST(TracingFileSystemTest, AtomicCountersUnderConcurrency) {
+ auto InMemoryFS = makeIntrusiveRefCnt<vfs::InMemoryFileSystem>();
+ InMemoryFS->setCurrentWorkingDirectory("/");
+ InMemoryFS->addFile("/foo", 0, MemoryBuffer::getMemBuffer("hello"));
+
+ auto TracingFS =
+ makeIntrusiveRefCnt<vfs::AtomicTracingFileSystem>(std::move(InMemoryFS));
+
+ constexpr unsigned NumThreads = 8;
+ constexpr unsigned CallsPerThread = 100;
+ std::vector<std::thread> Threads;
+ Threads.reserve(NumThreads);
+ for (unsigned I = 0; I < NumThreads; ++I) {
+ Threads.emplace_back([&] {
+ for (unsigned J = 0; J < CallsPerThread; ++J) {
+ (void)TracingFS->status("/foo");
+ (void)TracingFS->openFileForRead("/foo");
+ }
+ });
+ }
+ for (auto &T : Threads)
+ T.join();
+
+ EXPECT_EQ(TracingFS->NumStatusCalls.load(), NumThreads * CallsPerThread);
+ EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(),
+ NumThreads * CallsPerThread);
+}
>From 9f45a272aebc0e99061ae8f0dd40daa1e8905090 Mon Sep 17 00:00:00 2001
From: Artem Chikin <achikin at apple.com>
Date: Thu, 28 May 2026 11:29:31 +0100
Subject: [PATCH 2/2] [clang][deps] Add in-flight query caching to
`DependencyScanningFilesystemSharedCache`
The shared cache deduplicates results only after the underlying
filesystem trip completes, so concurrent workers querying the same
filename or UID each pay their own `stat` and `open`. Track in-flight
queries per key: the first arrival produces the result, late arrivals
wait on a condition variable and adopt it.
`CacheByFilename`'s value gains a `std::shared_ptr<InProgressEntry>`
field alongside the resolved entry and real path; `EntriesByUID` does
the same. The in-progress entry is populated for the duration of a
producer's filesystem trip and reset on publish.
Four new shard methods manage the slot lifecycle:
- `acquireFilenameSlot` / `acquireUIDSlot` collapse three outcomes
(resolved hit, in-progress wait, fresh producer) into one critical
section. Waiters block on the slot's condition variable, which
atomically releases the shard lock for the duration of the wait.
- `fulfilFilenameSlot` / `fulfilUIDSlot` publish the produced entry,
set `Done`, clear the slot, and `notify_all` waiters outside the
lock.
---
.../DependencyScanningFilesystem.h | 232 +++++------
.../DependencyScanningFilesystem.cpp | 359 ++++++++++++------
.../DependencyScanning/CMakeLists.txt | 2 +
.../DependencyScanningFilesystemTest.cpp | 239 ++++++++++++
4 files changed, 579 insertions(+), 253 deletions(-)
diff --git a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h
index c9ffc202ad694..fd997c36ded44 100644
--- a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h
@@ -16,6 +16,8 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include <condition_variable>
+#include <memory>
#include <mutex>
#include <optional>
#include <variant>
@@ -154,19 +156,63 @@ using CachedRealPath = llvm::ErrorOr<std::string>;
/// the worker threads.
class DependencyScanningFilesystemSharedCache {
public:
+ /// Tracks a cache entry whose value is currently being computed by one
+ /// worker so that other workers arriving at the same key can wait for the
+ /// result rather than producing it in parallel. The producer publishes the
+ /// produced outcome into \c Result, sets \c Done, and notifies waiters via
+ /// \c CondVar. \c Done and \c Result are guarded by this entry's own
+ /// \c Mutex, which also backs the wait, decoupled from the owning
+ /// shard's \c CacheLock so waking waiters do not contend on the shard lock.
+ ///
+ /// \c Result holds either a resolved entry or a bare error. The error case
+ /// represents an uncached negative stat: it is shared with the concurrent
+ /// waiters that overlapped this query in time, but is deliberately not
+ /// persisted in the shard, so a later, separate query re-runs it.
+ struct InProgressEntry {
+ std::mutex Mutex;
+ std::condition_variable CondVar;
+ bool Done = false;
+ llvm::ErrorOr<const CachedFileSystemEntry *> Result = std::error_code{};
+ };
+
+ /// Outcome of attempting to claim a slot for a given key:
+ /// - \c Produce non-null: the caller has the right to produce the entry
+ /// and must call the matching \c fulfil*Slot once finished. \c Resolved
+ /// is unused.
+ /// - \c Produce null: the cache (or a concurrent producer) has already
+ /// resolved the key; the caller adopts \c Resolved, which is either the
+ /// resolved entry or an uncached-negative error.
+ struct SlotAcquisitionResult {
+ llvm::ErrorOr<const CachedFileSystemEntry *> Resolved;
+ std::shared_ptr<InProgressEntry> Produce;
+ };
+
struct CacheShard {
/// The mutex that needs to be locked before mutation of any member.
mutable std::mutex CacheLock;
- /// Map from filenames to cached entries and real paths.
- llvm::StringMap<
- std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
- llvm::BumpPtrAllocator>
- CacheByFilename;
+ /// Cache state stored per filename: the resolved entry, the
+ /// resolved real path, and the in-flight slot for an outstanding
+ /// query (if any). \c InProgress is reset to null once the producing
+ /// worker publishes its result.
+ struct FilenameCacheState {
+ const CachedFileSystemEntry *Entry = nullptr;
+ const CachedRealPath *RealPath = nullptr;
+ std::shared_ptr<InProgressEntry> InProgress;
+ };
+
+ /// Cache state stored per unique ID; similar to
+ /// \c FilenameCacheState.
+ struct UIDCacheState {
+ const CachedFileSystemEntry *Entry = nullptr;
+ std::shared_ptr<InProgressEntry> InProgress;
+ };
+
+ /// Map from filenames to their cached state.
+ llvm::StringMap<FilenameCacheState, llvm::BumpPtrAllocator> CacheByFilename;
- /// Map from unique IDs to cached entries.
- llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
- EntriesByUID;
+ /// Map from unique IDs to their cached state.
+ llvm::DenseMap<llvm::sys::fs::UniqueID, UIDCacheState> EntriesByUID;
/// The backing storage for cached entries.
llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
@@ -177,13 +223,6 @@ class DependencyScanningFilesystemSharedCache {
/// The backing storage for cached real paths.
llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
- /// Returns entry associated with the filename or nullptr if none is found.
- const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
-
- /// Returns entry associated with the unique ID or nullptr if none is found.
- const CachedFileSystemEntry *
- findEntryByUID(llvm::sys::fs::UniqueID UID) const;
-
/// Returns entry associated with the filename if there is some. Otherwise,
/// constructs new one with the given status, associates it with the
/// filename and returns the result.
@@ -198,11 +237,37 @@ class DependencyScanningFilesystemSharedCache {
getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
std::unique_ptr<llvm::MemoryBuffer> Contents);
- /// Returns entry associated with the filename if there is some. Otherwise,
- /// associates the given entry with the filename and returns it.
- const CachedFileSystemEntry &
- getOrInsertEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry);
+ /// Claims a slot for \p Filename. If a resolved entry already exists,
+ /// returns it. If another worker is currently producing a result for this
+ /// filename, blocks on its \c InProgressEntry until done and adopts the
+ /// produced outcome (a resolved entry, or an uncached-negative error).
+ /// Otherwise installs a fresh \c InProgressEntry and returns it as a
+ /// producer slot which the caller must complete via \c fulfillFilenameSlot.
+ SlotAcquisitionResult acquireFilenameSlot(StringRef);
+
+ /// Claims a slot for \p UID. Same semantics as \c acquireFilenameSlot but
+ /// keyed by unique ID; used after \c stat() has identified a file so that
+ /// concurrent \c readFile() calls for the same on-disk file (reached via
+ /// different filenames) collapse onto a single open.
+ SlotAcquisitionResult acquireUIDSlot(llvm::sys::fs::UniqueID);
+
+ /// Completes a producer slot acquired via \c acquireFilenameSlot.
+ /// Publishes \p Result to waiters and, when it is a resolved entry,
+ /// records it under \p Filename in \c CacheByFilename. An error result
+ /// (an uncached negative stat) is shared with current waiters but not
+ /// recorded. Marks the slot done, removes the in-progress entry, and
+ /// notifies waiters.
+ void fulfillFilenameSlot(StringRef,
+ const std::shared_ptr<InProgressEntry> &,
+ llvm::ErrorOr<const CachedFileSystemEntry *>);
+
+ /// Completes a producer slot acquired via \c acquireUIDSlot.
+ /// Publishes \p Result under \p UID in \c EntriesByUID, records
+ /// it in the slot, marks the slot done, removes the in-progress
+ /// entry, and notifies waiters.
+ void fulfillUIDSlot(llvm::sys::fs::UniqueID,
+ const std::shared_ptr<InProgressEntry> &,
+ const CachedFileSystemEntry *);
/// Returns the real path associated with the filename or nullptr if none is
/// found.
@@ -256,65 +321,6 @@ class DependencyScanningFilesystemSharedCache {
unsigned NumShards;
};
-/// This class is a local cache, that caches the 'stat' and 'open' calls to the
-/// underlying real file system.
-class DependencyScanningFilesystemLocalCache {
- llvm::StringMap<
- std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
- llvm::BumpPtrAllocator>
- Cache;
-
-public:
- /// Returns entry associated with the filename or nullptr if none is found.
- const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto It = Cache.find(Filename);
- return It == Cache.end() ? nullptr : It->getValue().first;
- }
-
- /// Associates the given entry with the filename and returns the given entry
- /// pointer (for convenience).
- const CachedFileSystemEntry &
- insertEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry) {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!Inserted) {
- // The file is already present in the local cache. If we got here, it only
- // contains the real path. Let's make sure the entry is populated too.
- assert((!CachedEntry && CachedRealPath) && "entry already present");
- CachedEntry = &Entry;
- }
- return *CachedEntry;
- }
-
- /// Returns real path associated with the filename or nullptr if none is
- /// found.
- const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto It = Cache.find(Filename);
- return It == Cache.end() ? nullptr : It->getValue().second;
- }
-
- /// Associates the given real path with the filename and returns the given
- /// entry pointer (for convenience).
- const CachedRealPath &
- insertRealPathForFilename(StringRef Filename,
- const CachedRealPath &RealPath) {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!Inserted) {
- // The file is already present in the local cache. If we got here, it only
- // contains the entry. Let's make sure the real path is populated too.
- assert((!CachedRealPath && CachedEntry) && "real path already present");
- CachedRealPath = &RealPath;
- }
- return *CachedRealPath;
- }
-};
-
/// Reference to a CachedFileSystemEntry.
/// If the underlying entry is an opened file, this wrapper returns the file
/// contents and the scanned preprocessor directives.
@@ -411,14 +417,16 @@ class DependencyScanningWorkerFilesystem
bool exists(const Twine &Path) override;
private:
- /// For a filename that's not yet associated with any entry in the caches,
- /// uses the underlying filesystem to either look up the entry based in the
- /// shared cache indexed by unique ID, or creates new entry from scratch.
- /// \p FilenameForLookup will always be an absolute path, and different than
- /// \p OriginalFilename if \p OriginalFilename is relative.
+ /// Resolves the cache entry for \p FilenameForLookup through the shared
+ /// cache: returns an entry already produced by another worker (a cache hit
+ /// or the result of an in-flight wait), or claims the producer slot and
+ /// computes the entry via the underlying
+ /// filesystem. The result is written through to the local cache.
+ /// \p FilenameForLookup is always absolute, and may differ from
+ /// \p OriginalFilename if the latter is relative.
llvm::ErrorOr<const CachedFileSystemEntry &>
- computeAndStoreResult(StringRef OriginalFilename,
- StringRef FilenameForLookup);
+ resolveFilenameThroughSharedCache(StringRef OriginalFilename,
+ StringRef FilenameForLookup);
/// Represents a filesystem entry that has been stat-ed (and potentially read)
/// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
@@ -435,46 +443,6 @@ class DependencyScanningWorkerFilesystem
/// in status and size of read contents.
llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
- /// Returns entry associated with the unique ID of the given tentative entry
- /// if there is some in the shared cache. Otherwise, constructs new one,
- /// associates it with the unique ID and returns the result.
- const CachedFileSystemEntry &
- getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
-
- /// Returns entry associated with the filename or nullptr if none is found.
- ///
- /// Returns entry from local cache if there is some. Otherwise, if the entry
- /// is found in the shared cache, writes it through the local cache and
- /// returns it. Otherwise returns nullptr.
- const CachedFileSystemEntry *
- findEntryByFilenameWithWriteThrough(StringRef Filename);
-
- /// Returns entry associated with the unique ID in the shared cache or nullptr
- /// if none is found.
- const CachedFileSystemEntry *
- findSharedEntryByUID(llvm::vfs::Status Stat) const;
-
- /// Associates the given entry with the filename in the local cache and
- /// returns it.
- const CachedFileSystemEntry &
- insertLocalEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry) {
- return LocalCache.insertEntryForFilename(Filename, Entry);
- }
-
- /// Returns entry associated with the filename in the shared cache if there is
- /// some. Otherwise, constructs new one with the given error code, associates
- /// it with the filename and returns the result.
- const CachedFileSystemEntry &
- getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC);
-
- /// Returns entry associated with the filename in the shared cache if there is
- /// some. Otherwise, associates the given entry with the filename and returns
- /// it.
- const CachedFileSystemEntry &
- getOrInsertSharedEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry);
-
void printImpl(raw_ostream &OS, PrintType Type,
unsigned IndentLevel) const override {
printIndent(OS, IndentLevel);
@@ -484,9 +452,19 @@ class DependencyScanningWorkerFilesystem
/// The service associated with this VFS.
DependencyScanningService &Service;
+
+ /// Per-filename state cached locally by this worker thread, so repeated
+ /// queries can be served without touching the shared cache. The entry and
+ /// real path are arena-owned by the shared cache and outlive this worker, so
+ /// borrowing raw pointers here is safe.
+ struct LocalEntry {
+ const CachedFileSystemEntry *File = nullptr;
+ const CachedRealPath *RealPath = nullptr;
+ };
+
/// The local cache is used by the worker thread to cache file system queries
/// locally instead of querying the global cache every time.
- DependencyScanningFilesystemLocalCache LocalCache;
+ llvm::StringMap<LocalEntry, llvm::BumpPtrAllocator> LocalCache;
/// The working directory to use for making relative paths absolute before
/// using them for cache lookups.
diff --git a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp
index 6ff6cd9040665..133260c3ef026 100644
--- a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -115,8 +115,13 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries(
for (unsigned i = 0; i < NumShards; i++) {
const CacheShard &Shard = CacheShards[i];
std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
- for (const auto &[Path, CachedPair] : Shard.CacheByFilename) {
- const CachedFileSystemEntry *Entry = CachedPair.first;
+ for (const auto &[Path, State] : Shard.CacheByFilename) {
+ const CachedFileSystemEntry *Entry = State.Entry;
+ // Skip filenames with no resolved entry: either a real-path-only entry
+ // (from getRealPath), or an uncached negative stat that left the slot
+ // behind without recording one. Runs post-scan, so no in-progress slots.
+ if (!Entry)
+ continue;
llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path);
if (Status) {
if (Entry->getError()) {
@@ -151,38 +156,145 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries(
return InvalidDiagInfo;
}
-const CachedFileSystemEntry *
-DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
- StringRef Filename) const {
- assert(llvm::sys::path::is_absolute_gnu(Filename));
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto It = CacheByFilename.find(Filename);
- return It == CacheByFilename.end() ? nullptr : It->getValue().first;
-}
-
-const CachedFileSystemEntry *
-DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
- llvm::sys::fs::UniqueID UID) const {
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto It = EntriesByUID.find(UID);
- return It == EntriesByUID.end() ? nullptr : It->getSecond();
-}
-
const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,
llvm::ErrorOr<llvm::vfs::Status> Stat) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!CachedEntry) {
- // The entry is not present in the shared cache. Either the cache doesn't
- // know about the file at all, or it only knows about its real path.
- assert((Inserted || CachedRealPath) && "existing file with empty pair");
- CachedEntry =
+ auto [It, Inserted] = CacheByFilename.try_emplace(Filename);
+ auto &State = It->getValue();
+ if (!State.Entry) {
+ // The entry is not present in the shared cache. This method runs only
+ // from inside a producer slot held by the caller, so either the cache
+ // state was just freshly inserted or it already carries an in-flight
+ // slot.
+ assert((Inserted || State.InProgress) &&
+ "cache state should be fresh or carry an in-flight slot held by "
+ "the caller");
+ State.Entry =
new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
}
- return *CachedEntry;
+ return *State.Entry;
+}
+
+DependencyScanningFilesystemSharedCache::SlotAcquisitionResult
+DependencyScanningFilesystemSharedCache::CacheShard::acquireFilenameSlot(
+ StringRef Filename) {
+ assert(llvm::sys::path::is_absolute_gnu(Filename));
+ std::shared_ptr<InProgressEntry> Pending;
+ {
+ std::lock_guard<std::mutex> ShardLock(CacheLock);
+ auto &State = CacheByFilename[Filename];
+
+ // Cache hit.
+ if (State.Entry)
+ return SlotAcquisitionResult{State.Entry, nullptr};
+
+ // No outstanding query: install an in-progress entry and become the
+ // producer.
+ if (!State.InProgress) {
+ State.InProgress = std::make_shared<InProgressEntry>();
+ return SlotAcquisitionResult{std::error_code{}, State.InProgress};
+ }
+
+ // Another worker is producing for this filename. Capture the shared_ptr
+ // by copy so the slot stays alive for us to wait on after we release the
+ // shard lock (the producer resets State.InProgress on publish).
+ Pending = State.InProgress;
+ }
+
+ // Wait for the producer on the slot's own mutex/CV, with the shard lock
+ // released so unrelated keys in this shard are not blocked by our wait, then
+ // adopt its outcome. This holds even for a negative result the producer
+ // chose not to cache: our query overlapped the producer's in time, so we can
+ // share its answer without that constituting caching across separate queries.
+ std::unique_lock<std::mutex> EntryLock(Pending->Mutex);
+ Pending->CondVar.wait(EntryLock, [&] { return Pending->Done; });
+ return SlotAcquisitionResult{Pending->Result, nullptr};
+}
+
+DependencyScanningFilesystemSharedCache::SlotAcquisitionResult
+DependencyScanningFilesystemSharedCache::CacheShard::acquireUIDSlot(
+ llvm::sys::fs::UniqueID UID) {
+ std::shared_ptr<InProgressEntry> Pending;
+ {
+ std::lock_guard<std::mutex> ShardLock(CacheLock);
+ auto &State = EntriesByUID[UID];
+
+ // Cache hit.
+ if (State.Entry)
+ return SlotAcquisitionResult{State.Entry, nullptr};
+
+ // No outstanding query: install an in-progress entry and become the
+ // producer.
+ if (!State.InProgress) {
+ State.InProgress = std::make_shared<InProgressEntry>();
+ return SlotAcquisitionResult{std::error_code{}, State.InProgress};
+ }
+
+ // Another worker is producing for this UID. Capture the shared_ptr by copy
+ // so the slot stays alive for us to wait on after we release the shard
+ // lock.
+ Pending = State.InProgress;
+ }
+
+ // Wait for the producer on the slot's own mutex/CV, with the shard lock
+ // released. Unlike the filename slot, a UID producer always publishes a
+ // non-null entry, so the adopted result is never an error.
+ std::unique_lock<std::mutex> EntryLock(Pending->Mutex);
+ Pending->CondVar.wait(EntryLock, [&] { return Pending->Done; });
+ assert(Pending->Result && *Pending->Result &&
+ "in-progress UID slot fulfilled without an entry");
+ return SlotAcquisitionResult{Pending->Result, nullptr};
+}
+
+void DependencyScanningFilesystemSharedCache::CacheShard::fulfillFilenameSlot(
+ StringRef Filename,
+ const std::shared_ptr<
+ DependencyScanningFilesystemSharedCache::InProgressEntry> &IPE,
+ llvm::ErrorOr<const CachedFileSystemEntry *> Result) {
+ {
+ std::lock_guard<std::mutex> ShardLock(CacheLock);
+ auto &State = CacheByFilename[Filename];
+ // Only a resolved entry is recorded for later queries; an uncached
+ // negative stat (an error) is shared with current waiters but not
+ // persisted in the shard.
+ if (Result && !State.Entry)
+ State.Entry = *Result;
+ State.InProgress.reset();
+ }
+ // Publish the result under the slot's own mutex, then notify waiters. Waiters
+ // read Done/Result under this same mutex, so the predicate wait cannot miss
+ // the wakeup. The producer's shared_ptr keeps the slot (and CondVar) alive
+ // across notify_all.
+ {
+ std::lock_guard<std::mutex> EntryLock(IPE->Mutex);
+ IPE->Result = Result;
+ IPE->Done = true;
+ }
+ IPE->CondVar.notify_all();
+}
+
+void DependencyScanningFilesystemSharedCache::CacheShard::fulfillUIDSlot(
+ llvm::sys::fs::UniqueID UID,
+ const std::shared_ptr<
+ DependencyScanningFilesystemSharedCache::InProgressEntry> &IPE,
+ const CachedFileSystemEntry *Result) {
+ {
+ std::lock_guard<std::mutex> ShardLock(CacheLock);
+ auto &State = EntriesByUID[UID];
+ if (Result && !State.Entry)
+ State.Entry = Result;
+ State.InProgress.reset();
+ }
+ // Publish the result under the slot's own mutex, then notify waiters. See
+ // fulfillFilenameSlot for the synchronization rationale.
+ {
+ std::lock_guard<std::mutex> EntryLock(IPE->Mutex);
+ IPE->Result = Result;
+ IPE->Done = true;
+ }
+ IPE->CondVar.notify_all();
}
const CachedFileSystemEntry &
@@ -190,29 +302,16 @@ DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
std::unique_ptr<llvm::MemoryBuffer> Contents) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto [It, Inserted] = EntriesByUID.try_emplace(UID);
- auto &CachedEntry = It->getSecond();
- if (Inserted) {
+ auto &State = EntriesByUID[UID];
+ if (!State.Entry) {
CachedFileContents *StoredContents = nullptr;
if (Contents)
StoredContents = new (ContentsStorage.Allocate())
CachedFileContents(std::move(Contents));
- CachedEntry = new (EntryStorage.Allocate())
+ State.Entry = new (EntryStorage.Allocate())
CachedFileSystemEntry(std::move(Stat), StoredContents);
}
- return *CachedEntry;
-}
-
-const CachedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::CacheShard::
- getOrInsertEntryForFilename(StringRef Filename,
- const CachedFileSystemEntry &Entry) {
- std::lock_guard<std::mutex> LockGuard(CacheLock);
- auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
- auto &[CachedEntry, CachedRealPath] = It->getValue();
- if (!Inserted || !CachedEntry)
- CachedEntry = &Entry;
- return *CachedEntry;
+ return *State.Entry;
}
const CachedRealPath *
@@ -221,7 +320,7 @@ DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
assert(llvm::sys::path::is_absolute_gnu(Filename));
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = CacheByFilename.find(Filename);
- return It == CacheByFilename.end() ? nullptr : It->getValue().second;
+ return It == CacheByFilename.end() ? nullptr : It->getValue().RealPath;
}
const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
@@ -229,7 +328,7 @@ const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
llvm::ErrorOr<llvm::StringRef> RealPath) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
- const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
+ const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].RealPath;
if (!StoredRealPath) {
auto OwnedRealPath = [&]() -> CachedRealPath {
if (!RealPath)
@@ -253,82 +352,88 @@ DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
updateWorkingDirForCacheLookup();
}
-const CachedFileSystemEntry &
-DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
- TentativeEntry TEntry) {
- auto &Shard =
- Service.getSharedCache().getShardForUID(TEntry.Status.getUniqueID());
- return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
- std::move(TEntry.Status),
- std::move(TEntry.Contents));
-}
-
-const CachedFileSystemEntry *
-DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
- StringRef Filename) {
- if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
- return Entry;
- auto &Shard = Service.getSharedCache().getShardForFilename(Filename);
- if (const auto *Entry = Shard.findEntryByFilename(Filename))
- return &LocalCache.insertEntryForFilename(Filename, *Entry);
- return nullptr;
-}
-
-const CachedFileSystemEntry *
-DependencyScanningWorkerFilesystem::findSharedEntryByUID(
- llvm::vfs::Status Stat) const {
- return Service.getSharedCache()
- .getShardForUID(Stat.getUniqueID())
- .findEntryByUID(Stat.getUniqueID());
-}
-
-const CachedFileSystemEntry &
-DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForFilename(
- StringRef Filename, std::error_code EC) {
- return Service.getSharedCache()
- .getShardForFilename(Filename)
- .getOrEmplaceEntryForFilename(Filename, EC);
-}
-
-const CachedFileSystemEntry &
-DependencyScanningWorkerFilesystem::getOrInsertSharedEntryForFilename(
- StringRef Filename, const CachedFileSystemEntry &Entry) {
- return Service.getSharedCache()
- .getShardForFilename(Filename)
- .getOrInsertEntryForFilename(Filename, Entry);
-}
-
llvm::ErrorOr<const CachedFileSystemEntry &>
-DependencyScanningWorkerFilesystem::computeAndStoreResult(
+DependencyScanningWorkerFilesystem::resolveFilenameThroughSharedCache(
StringRef OriginalFilename, StringRef FilenameForLookup) {
- llvm::ErrorOr<llvm::vfs::Status> Stat =
- getUnderlyingFS().status(OriginalFilename);
- if (!Stat) {
- if (!Service.getOpts().CacheNegativeStats ||
- !shouldCacheNegativeStatsForPath(OriginalFilename))
- return Stat.getError();
-
- const auto &Entry =
- getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError());
- return insertLocalEntryForFilename(FilenameForLookup, Entry);
+ auto &FilenameShard =
+ Service.getSharedCache().getShardForFilename(FilenameForLookup);
+
+ // Acquire a per-filename in-progress entry. If another worker has already
+ // produced an entry under this filename, or is currently producing one,
+ // adopt its result instead of duplicating the underlying filesystem.
+ auto FilenameSlot = FilenameShard.acquireFilenameSlot(FilenameForLookup);
+ if (!FilenameSlot.Produce) {
+ // Resolved by a cache hit, or adopted from a concurrent producer. An error
+ // here is an uncached negative stat shared with us by a query that
+ // overlapped ours in time.
+ if (!FilenameSlot.Resolved)
+ return FilenameSlot.Resolved.getError();
+ return **FilenameSlot.Resolved;
}
- if (const auto *Entry = findSharedEntryByUID(*Stat))
- return insertLocalEntryForFilename(FilenameForLookup, *Entry);
-
- auto TEntry =
- Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename);
+ // Compute the result, then publish it through the producer slot so any
+ // workers waiting on the same filename adopt it.
+ std::shared_ptr<DependencyScanningFilesystemSharedCache::InProgressEntry>
+ ProducerSlot = std::move(FilenameSlot.Produce);
+ auto Result = [&]() -> llvm::ErrorOr<const CachedFileSystemEntry &> {
+ llvm::ErrorOr<llvm::vfs::Status> Stat =
+ getUnderlyingFS().status(OriginalFilename);
+ if (!Stat) {
+ // Honor the negative-stat caching policy. When caching is disabled for
+ // this path, return the error without recording it in the shard. The
+ // producer slot publishes the error to concurrent waiters so they adopt
+ // it (a valid shared answer for overlapping queries), but because it is
+ // not persisted, a later separate query re-runs it; this lets a file
+ // created mid-scan become visible.
+ if (!Service.getOpts().CacheNegativeStats ||
+ !shouldCacheNegativeStatsForPath(OriginalFilename))
+ return Stat.getError();
+ return FilenameShard.getOrEmplaceEntryForFilename(FilenameForLookup,
+ Stat.getError());
+ }
- const CachedFileSystemEntry *SharedEntry = [&]() {
- if (TEntry) {
- const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
- return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry);
+ // Acquire a per-UID producer slot to dedup concurrent readFile()
+ // calls across workers that arrived under different filenames
+ // pointing at the same on-disk file.
+ auto &UIDShard =
+ Service.getSharedCache().getShardForUID(Stat->getUniqueID());
+ auto UIDSlot = UIDShard.acquireUIDSlot(Stat->getUniqueID());
+ const CachedFileSystemEntry *SharedEntry = nullptr;
+ if (!UIDSlot.Produce) {
+ // Adopt the entry produced by a concurrent worker for the same UID.
+ SharedEntry = *UIDSlot.Resolved;
+ } else {
+ auto TEntry = Stat->isDirectory() ? TentativeEntry(*Stat)
+ : readFile(OriginalFilename);
+ if (TEntry) {
+ SharedEntry = &UIDShard.getOrEmplaceEntryForUID(
+ Stat->getUniqueID(), std::move(TEntry->Status),
+ std::move(TEntry->Contents));
+ } else {
+ // `readFile` failed despite `stat` succeeding. Cache the failure under
+ // the filename, and publish that same entry under the UID so awaiting
+ // workers surface the error rather than racing to retry the open.
+ SharedEntry = &FilenameShard.getOrEmplaceEntryForFilename(
+ FilenameForLookup, TEntry.getError());
+ }
+ UIDShard.fulfillUIDSlot(Stat->getUniqueID(), UIDSlot.Produce,
+ SharedEntry);
}
- return &getOrEmplaceSharedEntryForFilename(FilenameForLookup,
- TEntry.getError());
+
+ // The shared-cache binding under this filename is published by
+ // fulfillFilenameSlot below; the worker-local cache is filled by the
+ // caller once this resolve returns.
+ return *SharedEntry;
}();
- return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry);
+ // Publish the produced outcome: a resolved entry, or the error of an
+ // uncached negative stat, which concurrent waiters adopt without it being
+ // recorded in the shard.
+ llvm::ErrorOr<const CachedFileSystemEntry *> Produced =
+ Result ? llvm::ErrorOr<const CachedFileSystemEntry *>(&*Result)
+ : llvm::ErrorOr<const CachedFileSystemEntry *>(Result.getError());
+ FilenameShard.fulfillFilenameSlot(FilenameForLookup, ProducerSlot, Produced);
+ return Result;
}
llvm::ErrorOr<EntryRef>
@@ -339,12 +444,15 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
if (!FilenameForLookup)
return FilenameForLookup.getError();
- if (const auto *Entry =
- findEntryByFilenameWithWriteThrough(*FilenameForLookup))
- return EntryRef(OriginalFilename, *Entry).unwrapError();
- auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup);
+ auto &Local = LocalCache[*FilenameForLookup];
+ if (Local.File)
+ return EntryRef(OriginalFilename, *Local.File).unwrapError();
+
+ auto MaybeEntry =
+ resolveFilenameThroughSharedCache(OriginalFilename, *FilenameForLookup);
if (!MaybeEntry)
return MaybeEntry.getError();
+ Local.File = &*MaybeEntry;
return EntryRef(OriginalFilename, *MaybeEntry).unwrapError();
}
@@ -448,18 +556,17 @@ DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
};
// If we already have the result in local cache, no work required.
- if (const auto *RealPath =
- LocalCache.findRealPathByFilename(*FilenameForLookup))
- return HandleCachedRealPath(*RealPath);
+ auto &Local = LocalCache[*FilenameForLookup];
+ if (Local.RealPath)
+ return HandleCachedRealPath(*Local.RealPath);
// If we have the result in the shared cache, cache it locally.
auto &Shard =
Service.getSharedCache().getShardForFilename(*FilenameForLookup);
if (const auto *ShardRealPath =
Shard.findRealPathByFilename(*FilenameForLookup)) {
- const auto &RealPath = LocalCache.insertRealPathForFilename(
- *FilenameForLookup, *ShardRealPath);
- return HandleCachedRealPath(RealPath);
+ Local.RealPath = ShardRealPath;
+ return HandleCachedRealPath(*Local.RealPath);
}
// If we don't know the real path, compute it...
@@ -473,8 +580,8 @@ DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
// whatever is in the shared cache into the local one.
const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
*FilenameForLookup, ComputedRealPath);
- return HandleCachedRealPath(
- LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
+ Local.RealPath = &RealPath;
+ return HandleCachedRealPath(*Local.RealPath);
}
std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
diff --git a/clang/unittests/DependencyScanning/CMakeLists.txt b/clang/unittests/DependencyScanning/CMakeLists.txt
index 4f2a36067209c..53d257a76784d 100644
--- a/clang/unittests/DependencyScanning/CMakeLists.txt
+++ b/clang/unittests/DependencyScanning/CMakeLists.txt
@@ -4,6 +4,8 @@ add_clang_unittest(ClangDependencyScanningTests
CLANG_LIBS
clangDependencyScanning
clangFrontend # For TextDiagnosticPrinter.
+ LINK_LIBS
+ LLVMTestingSupport
LLVM_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
Option
diff --git a/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp
index 85525ba5324ca..8108200d0b21c 100644
--- a/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp
+++ b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -10,10 +10,39 @@
#include "clang/DependencyScanning/DependencyScanningService.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
#include "gtest/gtest.h"
+#include <condition_variable>
+#include <mutex>
+#include <thread>
using namespace clang::dependencies;
+namespace {
+
+/// Releases all waiting threads simultaneously so that the worker logic can
+/// be observed under maximal concurrency rather than a thread-spawn cascade.
+struct StartBarrier {
+ std::mutex M;
+ std::condition_variable CV;
+ bool Released = false;
+
+ void wait() {
+ std::unique_lock<std::mutex> Lock(M);
+ CV.wait(Lock, [&] { return Released; });
+ }
+
+ void release() {
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ Released = true;
+ }
+ CV.notify_all();
+ }
+};
+
+} // namespace
+
TEST(DependencyScanningFilesystem, OpenFileAndGetBufferRepeatedly) {
auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
InMemoryFS->setCurrentWorkingDirectory("/");
@@ -334,3 +363,213 @@ TEST(DependencyScanningFilesystem, DoNotDiagnoseDirSizeChange) {
auto InvalidEntries = Service.getSharedCache().getOutOfDateEntries(*FS);
EXPECT_EQ(InvalidEntries.size(), 0u);
}
+
+TEST(DependencyScanningWorkerFilesystem, ConcurrentSameFilenameDeduplicates) {
+ llvm::unittest::TempDir TmpDir("dswf-same-filename", /*Unique=*/true);
+ llvm::unittest::TempFile TmpFile(TmpDir.path("foo.c"), /*Suffix=*/"",
+ /*Contents=*/"hello");
+
+ auto TracingFS =
+ llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>(
+ llvm::vfs::getRealFileSystem());
+ DependencyScanningService Service({});
+
+ constexpr unsigned NumWorkers = 16;
+ std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers;
+ Workers.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I)
+ Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>(
+ Service, TracingFS));
+
+ StartBarrier Barrier;
+ std::vector<std::thread> Threads;
+ std::vector<llvm::ErrorOr<EntryRef>> Results(
+ NumWorkers, llvm::ErrorOr<EntryRef>(std::error_code{}));
+ Threads.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ Threads.emplace_back([&, I] {
+ Barrier.wait();
+ Results[I] = Workers[I]->getOrCreateFileSystemEntry(TmpFile.path());
+ });
+ }
+ Barrier.release();
+ for (auto &T : Threads)
+ T.join();
+
+ EXPECT_EQ(TracingFS->NumStatusCalls.load(), 1u);
+ EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 1u);
+
+ // All workers must have observed the same underlying entry.
+ ASSERT_TRUE(Results[0]);
+ llvm::sys::fs::UniqueID FirstUID = Results[0]->getStatus().getUniqueID();
+ const char *FirstContents = Results[0]->getContents().data();
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ ASSERT_TRUE(Results[I]) << "worker " << I << " failed";
+ EXPECT_EQ(Results[I]->getStatus().getUniqueID(), FirstUID);
+ EXPECT_EQ(Results[I]->getContents().data(), FirstContents);
+ }
+}
+
+TEST(DependencyScanningWorkerFilesystem,
+ ConcurrentSameUIDDifferentFilenamesDeduplicatesOpen) {
+ // Use a real on-disk file plus a hard link so the two filenames share a
+ // UniqueID, exercising the per-UID slot.
+ llvm::unittest::TempDir TmpDir("dswf-same-uid", /*Unique=*/true);
+ llvm::SmallString<128> RealPath = TmpDir.path("real.c");
+ llvm::SmallString<128> AliasPath = TmpDir.path("alias.c");
+ llvm::unittest::TempFile TmpFile(RealPath, /*Suffix=*/"", /*Contents=*/"hi");
+ llvm::unittest::TempLink TmpLink(RealPath, AliasPath);
+
+ auto TracingFS =
+ llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>(
+ llvm::vfs::getRealFileSystem());
+ DependencyScanningService Service({});
+
+ constexpr unsigned NumWorkers = 16;
+ std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers;
+ Workers.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I)
+ Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>(
+ Service, TracingFS));
+
+ StartBarrier Barrier;
+ std::vector<std::thread> Threads;
+ std::vector<llvm::ErrorOr<EntryRef>> Results(
+ NumWorkers, llvm::ErrorOr<EntryRef>(std::error_code{}));
+ Threads.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ llvm::StringRef Path =
+ (I % 2 == 0) ? llvm::StringRef(RealPath) : llvm::StringRef(AliasPath);
+ Threads.emplace_back([&, I, Path] {
+ Barrier.wait();
+ Results[I] = Workers[I]->getOrCreateFileSystemEntry(Path);
+ });
+ }
+ Barrier.release();
+ for (auto &T : Threads)
+ T.join();
+
+ // Each filename's slot dedupes its own stats; with two filenames we expect
+ // at most two stats. The UID slot then collapses the actual reads.
+ EXPECT_LE(TracingFS->NumStatusCalls.load(), 2u);
+ EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 1u);
+
+ ASSERT_TRUE(Results[0]);
+ llvm::sys::fs::UniqueID FirstUID = Results[0]->getStatus().getUniqueID();
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ ASSERT_TRUE(Results[I]) << "worker " << I << " failed";
+ EXPECT_EQ(Results[I]->getStatus().getUniqueID(), FirstUID);
+ }
+}
+
+TEST(DependencyScanningWorkerFilesystem, ConcurrentNegativeStatDeduplicates) {
+ // Construct a path inside a temporary directory but never create the
+ // file, so concurrent stat() calls land on the negative-stat path through
+ // the real filesystem.
+ llvm::unittest::TempDir TmpDir("dswf-negative", /*Unique=*/true);
+ llvm::SmallString<128> MissingPath = TmpDir.path("missing.h");
+
+ auto TracingFS =
+ llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>(
+ llvm::vfs::getRealFileSystem());
+ // The dedup under test only applies when negative stats are cached; enable
+ // caching and use a path whose extension is eligible for it.
+ DependencyScanningServiceOptions Opts;
+ Opts.CacheNegativeStats = true;
+ DependencyScanningService Service(std::move(Opts));
+
+ constexpr unsigned NumWorkers = 16;
+ std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers;
+ Workers.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I)
+ Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>(
+ Service, TracingFS));
+
+ StartBarrier Barrier;
+ std::vector<std::thread> Threads;
+ std::vector<llvm::ErrorOr<llvm::vfs::Status>> Results(
+ NumWorkers, llvm::ErrorOr<llvm::vfs::Status>(std::error_code{}));
+ Threads.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ Threads.emplace_back([&, I] {
+ Barrier.wait();
+ Results[I] = Workers[I]->status(MissingPath);
+ });
+ }
+ Barrier.release();
+ for (auto &T : Threads)
+ T.join();
+
+ EXPECT_EQ(TracingFS->NumStatusCalls.load(), 1u);
+ EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 0u);
+
+ // Every worker observed the same negative result.
+ ASSERT_FALSE(Results[0]);
+ std::error_code FirstError = Results[0].getError();
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ ASSERT_FALSE(Results[I]) << "worker " << I << " unexpectedly succeeded";
+ EXPECT_EQ(Results[I].getError(), FirstError);
+ }
+}
+
+TEST(DependencyScanningWorkerFilesystem,
+ ConcurrentUncachedNegativeStatIsSharedButNotPersisted) {
+ // Same as above, but with negative-stat caching disabled. Concurrent queries
+ // that overlap the producer's in-flight stat still adopt its negative
+ // result (sharing an answer for overlapping queries is not "caching"), so no
+ // worker opens the missing file. Because the result is not persisted, a
+ // later, separate query must re-run the stat.
+ llvm::unittest::TempDir TmpDir("dswf-uncached-negative", /*Unique=*/true);
+ llvm::SmallString<128> MissingPath = TmpDir.path("missing.h");
+
+ auto TracingFS =
+ llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>(
+ llvm::vfs::getRealFileSystem());
+ DependencyScanningServiceOptions Opts;
+ Opts.CacheNegativeStats = false;
+ DependencyScanningService Service(std::move(Opts));
+
+ constexpr unsigned NumWorkers = 16;
+ std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers;
+ Workers.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I)
+ Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>(
+ Service, TracingFS));
+
+ StartBarrier Barrier;
+ std::vector<std::thread> Threads;
+ std::vector<llvm::ErrorOr<llvm::vfs::Status>> Results(
+ NumWorkers, llvm::ErrorOr<llvm::vfs::Status>(std::error_code{}));
+ Threads.reserve(NumWorkers);
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ Threads.emplace_back([&, I] {
+ Barrier.wait();
+ Results[I] = Workers[I]->status(MissingPath);
+ });
+ }
+ Barrier.release();
+ for (auto &T : Threads)
+ T.join();
+
+ // A negative stat never opens the file, regardless of how many workers
+ // produced versus adopted. Producers are bounded by the worker count;
+ // adopters add none. With caching off the result is not persisted, so the
+ // count is not pinned to one, but it can never exceed one stat per worker.
+ EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 0u);
+ unsigned StatsAfterBurst = TracingFS->NumStatusCalls.load();
+ EXPECT_GE(StatsAfterBurst, 1u);
+ EXPECT_LE(StatsAfterBurst, NumWorkers);
+
+ // Every worker observed the same negative result.
+ ASSERT_FALSE(Results[0]);
+ std::error_code FirstError = Results[0].getError();
+ for (unsigned I = 0; I < NumWorkers; ++I) {
+ ASSERT_FALSE(Results[I]) << "worker " << I << " unexpectedly succeeded";
+ EXPECT_EQ(Results[I].getError(), FirstError);
+ }
+
+ // The uncached negative was shared, not cached: a later, separate query
+ // re-runs the stat rather than adopting a persisted miss.
+ EXPECT_FALSE(Workers[0]->status(MissingPath));
+ EXPECT_EQ(TracingFS->NumStatusCalls.load(), StatsAfterBurst + 1);
+}
More information about the cfe-commits
mailing list