[clang] 5daeada - [clang][deps] Ensure filesystem cache consistency

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 21 04:04:32 PST 2022


Author: Jan Svoboda
Date: 2022-01-21T13:04:25+01:00
New Revision: 5daeada33051aa85777593d3f69eb29f26e7fb2f

URL: https://github.com/llvm/llvm-project/commit/5daeada33051aa85777593d3f69eb29f26e7fb2f
DIFF: https://github.com/llvm/llvm-project/commit/5daeada33051aa85777593d3f69eb29f26e7fb2f.diff

LOG: [clang][deps] Ensure filesystem cache consistency

The minimizing filesystem used by the dependency scanner isn't great when it comes to the consistency of its caches. There are two problems that can be exposed by a filesystem that changes during dependency scan:
1. In-memory cache entries for original and minimized files are distinct, populated at different times using separate stat/open syscalls. This means that when a file is read with minimization disabled, its contents might be inconsistent when the same file is read with minimization enabled at later point (and vice versa).
2. In-memory cache entries are indexed by filename. This is problematic for symlinks, where the contents of the symlink might be inconsistent with contents of the original file (for the same reason as in problem 1).

This patch ensures consistency by always stating/reading a file exactly once. The original contents are always cached and minimized contents are derived from that on demand. The cache entries are now indexed by their `UniqueID` ensuring consistency for symlinks too. Moreover, the stat/read syscalls are now issued outside of critical section.

Depends on D115935.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D114966

Added: 
    

Modified: 
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
    clang/unittests/Tooling/DependencyScannerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 08a60fe780f50..70e9c4a3ffea2 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -22,6 +22,26 @@ namespace clang {
 namespace tooling {
 namespace dependencies {
 
+/// Original and minimized contents of a cached file entry. Single instance can
+/// be shared between multiple entries.
+struct CachedFileContents {
+  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Original)
+      : Original(std::move(Original)), MinimizedAccess(nullptr) {}
+
+  /// Owning storage for the minimized contents.
+  std::unique_ptr<llvm::MemoryBuffer> Original;
+
+  /// The mutex that must be locked before mutating minimized contents.
+  std::mutex ValueLock;
+  /// Owning storage for the minimized contents.
+  std::unique_ptr<llvm::MemoryBuffer> MinimizedStorage;
+  /// Accessor to the minimized contents that's atomic to avoid data races.
+  std::atomic<llvm::MemoryBuffer *> MinimizedAccess;
+  /// Skipped range mapping of the minimized contents.
+  /// This is initialized iff `MinimizedAccess != nullptr`.
+  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+};
+
 /// An in-memory representation of a file system entity that is of interest to
 /// the dependency scanning filesystem.
 ///
@@ -29,111 +49,99 @@ namespace dependencies {
 /// - opened file with original contents and a stat value,
 /// - opened file with original contents, minimized contents and a stat value,
 /// - directory entry with its stat value,
-/// - filesystem error,
-/// - uninitialized entry with unknown status.
+/// - filesystem error.
+///
+/// Single instance of this class can be shared across 
diff erent filenames (e.g.
+/// a regular file and a symlink). For this reason the status filename is empty
+/// and is only materialized by \c EntryRef that knows the requested filename.
 class CachedFileSystemEntry {
 public:
-  /// Creates an uninitialized entry.
-  CachedFileSystemEntry()
-      : MaybeStat(llvm::vfs::Status()), MinimizedContentsAccess(nullptr) {}
-
-  /// Initialize the cached file system entry.
-  void init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename,
-            llvm::vfs::FileSystem &FS);
+  /// Creates an entry without contents: either a filesystem error or
+  /// a directory with stat value.
+  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
+      : MaybeStat(std::move(Stat)), Contents(nullptr) {
+    clearStatName();
+  }
 
-  /// Initialize the entry as file with minimized or original contents.
-  ///
-  /// The filesystem opens the file even for `stat` calls open to avoid the
-  /// issues with stat + open of minimized files that might lead to a
-  /// mismatching size of the file.
-  llvm::ErrorOr<llvm::vfs::Status> initFile(StringRef Filename,
-                                            llvm::vfs::FileSystem &FS);
-
-  /// Minimize contents of the file.
-  void minimizeFile();
-
-  /// \returns True if the entry is initialized.
-  bool isInitialized() const {
-    return !MaybeStat || MaybeStat->isStatusKnown();
+  /// Creates an entry representing a file with contents.
+  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
+                        CachedFileContents *Contents)
+      : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
+    clearStatName();
   }
 
   /// \returns True if the entry is a filesystem error.
   bool isError() const { return !MaybeStat; }
 
-  /// \returns True if the current entry points to a directory.
+  /// \returns True if the current entry represents a directory.
   bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
 
   /// \returns Original contents of the file.
   StringRef getOriginalContents() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
     assert(!MaybeStat->isDirectory() && "not a file");
-    assert(OriginalContents && "not read");
-    return OriginalContents->getBuffer();
+    assert(Contents && "contents not initialized");
+    return Contents->Original->getBuffer();
   }
 
   /// \returns Minimized contents of the file.
   StringRef getMinimizedContents() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
-    assert(!isDirectory() && "not a file");
-    llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load();
+    assert(!MaybeStat->isDirectory() && "not a file");
+    assert(Contents && "contents not initialized");
+    llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load();
     assert(Buffer && "not minimized");
     return Buffer->getBuffer();
   }
 
-  /// \returns True if this entry represents a file that can be read.
-  bool isReadable() const { return MaybeStat && !MaybeStat->isDirectory(); }
-
-  /// \returns True if this cached entry needs to be updated.
-  bool needsUpdate(bool ShouldBeMinimized) const {
-    return isReadable() && needsMinimization(ShouldBeMinimized);
-  }
-
-  /// \returns True if the contents of this entry need to be minimized.
-  bool needsMinimization(bool ShouldBeMinimized) const {
-    return ShouldBeMinimized && !MinimizedContentsAccess.load();
-  }
-
   /// \returns The error.
-  std::error_code getError() const {
-    assert(isInitialized() && "not initialized");
-    return MaybeStat.getError();
-  }
+  std::error_code getError() const { return MaybeStat.getError(); }
 
-  /// \returns The entry status.
+  /// \returns The entry status with empty filename.
   llvm::vfs::Status getStatus() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
+    assert(MaybeStat->getName().empty() && "stat name must be empty");
     return *MaybeStat;
   }
 
-  /// \returns the name of the file.
-  StringRef getName() const {
-    assert(isInitialized() && "not initialized");
+  /// \returns The unique ID of the entry.
+  llvm::sys::fs::UniqueID getUniqueID() const {
     assert(!isError() && "error");
-    return MaybeStat->getName();
+    return MaybeStat->getUniqueID();
   }
 
-  /// Return the mapping between location -> distance that is used to speed up
+  /// \returns The mapping between location -> distance that is used to speed up
   /// the block skipping in the preprocessor.
   const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const {
     assert(!isError() && "error");
     assert(!isDirectory() && "not a file");
-    return PPSkippedRangeMapping;
+    assert(Contents && "contents not initialized");
+    return Contents->PPSkippedRangeMapping;
+  }
+
+  /// \returns The data structure holding both original and minimized contents.
+  CachedFileContents *getContents() const {
+    assert(!isError() && "error");
+    assert(!isDirectory() && "not a file");
+    return Contents;
   }
 
 private:
-  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
-  std::unique_ptr<llvm::MemoryBuffer> OriginalContents;
+  void clearStatName() {
+    if (MaybeStat)
+      MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
+  }
 
-  /// Owning storage for the minimized file contents.
-  std::unique_ptr<llvm::MemoryBuffer> MinimizedContentsStorage;
-  /// Atomic view of the minimized file contents.
-  /// This prevents data races when multiple threads call `needsMinimization`.
-  std::atomic<llvm::MemoryBuffer *> MinimizedContentsAccess;
+  /// Either the filesystem error or status of the entry.
+  /// The filename is empty and only materialized by \c EntryRef.
+  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
 
-  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+  /// Non-owning pointer to the file contents.
+  ///
+  /// We're using pointer here to keep the size of this class small. Instances
+  /// representing directories and filesystem errors don't hold any contents
+  /// anyway.
+  CachedFileContents *Contents;
 };
 
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
@@ -144,24 +152,59 @@ class CachedFileSystemEntry {
 /// the worker threads.
 class DependencyScanningFilesystemSharedCache {
 public:
-  struct SharedFileSystemEntry {
-    std::mutex ValueLock;
-    CachedFileSystemEntry Value;
+  struct CacheShard {
+    /// The mutex that needs to be locked before mutation of any member.
+    mutable std::mutex CacheLock;
+
+    /// Map from filenames to cached entries.
+    llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
+        EntriesByFilename;
+
+    /// Map from unique IDs to cached entries.
+    llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
+        EntriesByUID;
+
+    /// The backing storage for cached entries.
+    llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
+
+    /// The backing storage for cached contents.
+    llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
+
+    /// Returns entry associated with the filename or nullptr if none is found.
+    const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
+
+    /// Returns entry associated with the unique ID or nullptr if none is found.
+    const CachedFileSystemEntry *
+    findEntryByUID(llvm::sys::fs::UniqueID UID) const;
+
+    /// Returns entry associated with the filename if there is some. Otherwise,
+    /// constructs new one with the given status, associates it with the
+    /// filename and returns the result.
+    const CachedFileSystemEntry &
+    getOrEmplaceEntryForFilename(StringRef Filename,
+                                 llvm::ErrorOr<llvm::vfs::Status> Stat);
+
+    /// Returns entry associated with the unique ID if there is some. Otherwise,
+    /// constructs new one with the given status and contents, associates it
+    /// with the unique ID and returns the result.
+    const CachedFileSystemEntry &
+    getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
+                            std::unique_ptr<llvm::MemoryBuffer> Contents);
+
+    /// Returns entry associated with the filename if there is some. Otherwise,
+    /// associates the given entry with the filename and returns it.
+    const CachedFileSystemEntry &
+    getOrInsertEntryForFilename(StringRef Filename,
+                                const CachedFileSystemEntry &Entry);
   };
 
   DependencyScanningFilesystemSharedCache();
 
-  /// Returns a cache entry for the corresponding key.
-  ///
-  /// A new cache entry is created if the key is not in the cache. This is a
-  /// thread safe call.
-  SharedFileSystemEntry &get(StringRef Key);
+  /// Returns shard for the given key.
+  CacheShard &getShardForFilename(StringRef Filename) const;
+  CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
 
 private:
-  struct CacheShard {
-    std::mutex CacheLock;
-    llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
-  };
   std::unique_ptr<CacheShard[]> CacheShards;
   unsigned NumShards;
 };
@@ -173,8 +216,20 @@ class DependencyScanningFilesystemLocalCache {
   llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
 
 public:
-  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
-    return Cache[Filename];
+  /// Returns entry associated with the filename or nullptr if none is found.
+  const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue();
+  }
+
+  /// Associates the given entry with the filename and returns the given entry
+  /// pointer (for convenience).
+  const CachedFileSystemEntry &
+  insertEntryForFilename(StringRef Filename,
+                         const CachedFileSystemEntry &Entry) {
+    const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
+    assert(InsertedEntry == &Entry && "entry already present");
+    return *InsertedEntry;
   }
 };
 
@@ -187,23 +242,25 @@ class EntryRef {
   /// are minimized.
   bool Minimized;
 
+  /// The filename used to access this entry.
+  std::string Filename;
+
   /// The underlying cached entry.
   const CachedFileSystemEntry &Entry;
 
 public:
-  EntryRef(bool Minimized, const CachedFileSystemEntry &Entry)
-      : Minimized(Minimized), Entry(Entry) {}
+  EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry)
+      : Minimized(Minimized), Filename(Name), Entry(Entry) {}
 
   llvm::vfs::Status getStatus() const {
     llvm::vfs::Status Stat = Entry.getStatus();
-    if (Stat.isDirectory())
-      return Stat;
-    return llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
+    if (!Stat.isDirectory())
+      Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
+    return llvm::vfs::Status::copyWithNewName(Stat, Filename);
   }
 
   bool isError() const { return Entry.isError(); }
   bool isDirectory() const { return Entry.isDirectory(); }
-  StringRef getName() const { return Entry.getName(); }
 
   /// If the cached entry represents an error, promotes it into `ErrorOr`.
   llvm::ErrorOr<EntryRef> unwrapError() const {
@@ -253,8 +310,87 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// Check whether the file should be minimized.
   bool shouldMinimize(StringRef Filename);
 
+  /// Returns entry for the given filename.
+  ///
+  /// Attempts to use the local and shared caches first, then falls back to
+  /// using the underlying filesystem.
   llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
 
+  /// For a filename that's not yet associated with any entry in the caches,
+  /// uses the underlying filesystem to either look up the entry based in the
+  /// shared cache indexed by unique ID, or creates new entry from scratch.
+  llvm::ErrorOr<const CachedFileSystemEntry &>
+  computeAndStoreResult(StringRef Filename);
+
+  /// Minimizes the given entry if necessary and returns a wrapper object with
+  /// reference semantics.
+  EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry,
+                               StringRef Filename);
+
+  /// Represents a filesystem entry that has been stat-ed (and potentially read)
+  /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
+  struct TentativeEntry {
+    llvm::vfs::Status Status;
+    std::unique_ptr<llvm::MemoryBuffer> Contents;
+
+    TentativeEntry(llvm::vfs::Status Status,
+                   std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
+        : Status(std::move(Status)), Contents(std::move(Contents)) {}
+  };
+
+  /// Reads file at the given path. Enforces consistency between the file size
+  /// in status and size of read contents.
+  llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
+
+  /// Returns entry associated with the unique ID of the given tentative entry
+  /// if there is some in the shared cache. Otherwise, constructs new one,
+  /// associates it with the unique ID and returns the result.
+  const CachedFileSystemEntry &
+  getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
+
+  /// Returns entry associated with the filename or nullptr if none is found.
+  ///
+  /// Returns entry from local cache if there is some. Otherwise, if the entry
+  /// is found in the shared cache, writes it through the local cache and
+  /// returns it. Otherwise returns nullptr.
+  const CachedFileSystemEntry *
+  findEntryByFilenameWithWriteThrough(StringRef Filename);
+
+  /// Returns entry associated with the unique ID in the shared cache or nullptr
+  /// if none is found.
+  const CachedFileSystemEntry *
+  findSharedEntryByUID(llvm::vfs::Status Stat) const {
+    return SharedCache.getShardForUID(Stat.getUniqueID())
+        .findEntryByUID(Stat.getUniqueID());
+  }
+
+  /// Associates the given entry with the filename in the local cache and
+  /// returns it.
+  const CachedFileSystemEntry &
+  insertLocalEntryForFilename(StringRef Filename,
+                              const CachedFileSystemEntry &Entry) {
+    return LocalCache.insertEntryForFilename(Filename, Entry);
+  }
+
+  /// Returns entry associated with the filename in the shared cache if there is
+  /// some. Otherwise, constructs new one with the given error code, associates
+  /// it with the filename and returns the result.
+  const CachedFileSystemEntry &
+  getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
+    return SharedCache.getShardForFilename(Filename)
+        .getOrEmplaceEntryForFilename(Filename, EC);
+  }
+
+  /// Returns entry associated with the filename in the shared cache if there is
+  /// some. Otherwise, associates the given entry with the filename and returns
+  /// it.
+  const CachedFileSystemEntry &
+  getOrInsertSharedEntryForFilename(StringRef Filename,
+                                    const CachedFileSystemEntry &Entry) {
+    return SharedCache.getShardForFilename(Filename)
+        .getOrInsertEntryForFilename(Filename, Entry);
+  }
+
   /// The global cache shared between worker threads.
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 6b8c692335bd6..cc8968a7b680f 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -16,10 +16,10 @@ using namespace clang;
 using namespace tooling;
 using namespace dependencies;
 
-llvm::ErrorOr<llvm::vfs::Status>
-CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
+llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
+DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
   // Load the file and its content from the file system.
-  auto MaybeFile = FS.openFileForRead(Filename);
+  auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
   if (!MaybeFile)
     return MaybeFile.getError();
   auto File = std::move(*MaybeFile);
@@ -34,24 +34,42 @@ CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
     return MaybeBuffer.getError();
   auto Buffer = std::move(*MaybeBuffer);
 
-  OriginalContents = std::move(Buffer);
-  return Stat;
+  // If the file size changed between read and stat, pretend it didn't.
+  if (Stat.getSize() != Buffer->getBufferSize())
+    Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
+
+  return TentativeEntry(Stat, std::move(Buffer));
 }
 
-void CachedFileSystemEntry::minimizeFile() {
-  assert(OriginalContents && "minimizing missing contents");
+EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
+    const CachedFileSystemEntry &Entry, StringRef Filename) {
+  if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename))
+    return EntryRef(/*Minimized=*/false, Filename, Entry);
+
+  CachedFileContents *Contents = Entry.getContents();
+  assert(Contents && "contents not initialized");
+
+  // Double-checked locking.
+  if (Contents->MinimizedAccess.load())
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
+
+  std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
+
+  // Double-checked locking.
+  if (Contents->MinimizedAccess.load())
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
 
   llvm::SmallString<1024> MinimizedFileContents;
   // Minimize the file down to directives that might affect the dependencies.
   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
-  if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(),
+  if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(),
                                            MinimizedFileContents, Tokens)) {
     // FIXME: Propagate the diagnostic if desired by the client.
     // Use the original file if the minimization failed.
-    MinimizedContentsStorage =
-        llvm::MemoryBuffer::getMemBuffer(*OriginalContents);
-    MinimizedContentsAccess.store(MinimizedContentsStorage.get());
-    return;
+    Contents->MinimizedStorage =
+        llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
+    Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
   }
 
   // The contents produced by the minimizer must be null terminated.
@@ -74,16 +92,17 @@ void CachedFileSystemEntry::minimizeFile() {
     }
     Mapping[Range.Offset] = Range.Length;
   }
-  PPSkippedRangeMapping = std::move(Mapping);
+  Contents->PPSkippedRangeMapping = std::move(Mapping);
 
-  MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
+  Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
       std::move(MinimizedFileContents));
-  // The algorithm in `getOrCreateFileSystemEntry` uses the presence of
-  // minimized contents to decide whether an entry is up-to-date or not.
-  // If it is up-to-date, the skipped range mappings must be already computed.
-  // This is why we need to store the minimized contents **after** storing the
-  // skipped range mappings. Failing to do so would lead to a data race.
-  MinimizedContentsAccess.store(MinimizedContentsStorage.get());
+  // This function performed double-checked locking using `MinimizedAccess`.
+  // Assigning it must be the last thing this function does. If we were to
+  // assign it before `PPSkippedRangeMapping`, other threads may skip the
+  // critical section (`MinimizedAccess != nullptr`) and access the mappings
+  // that are about to be initialized, leading to a data race.
+  Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
+  return EntryRef(/*Minimized=*/true, Filename, Entry);
 }
 
 DependencyScanningFilesystemSharedCache::
@@ -98,12 +117,70 @@ DependencyScanningFilesystemSharedCache::
   CacheShards = std::make_unique<CacheShard[]>(NumShards);
 }
 
-DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::get(StringRef Key) {
-  CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
-  std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
-  auto It = Shard.Cache.try_emplace(Key);
-  return It.first->getValue();
+DependencyScanningFilesystemSharedCache::CacheShard &
+DependencyScanningFilesystemSharedCache::getShardForFilename(
+    StringRef Filename) const {
+  return CacheShards[llvm::hash_value(Filename) % NumShards];
+}
+
+DependencyScanningFilesystemSharedCache::CacheShard &
+DependencyScanningFilesystemSharedCache::getShardForUID(
+    llvm::sys::fs::UniqueID UID) const {
+  auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
+  return CacheShards[Hash % NumShards];
+}
+
+const CachedFileSystemEntry *
+DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
+    StringRef Filename) const {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = EntriesByFilename.find(Filename);
+  return It == EntriesByFilename.end() ? nullptr : It->getValue();
+}
+
+const CachedFileSystemEntry *
+DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
+    llvm::sys::fs::UniqueID UID) const {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = EntriesByUID.find(UID);
+  return It == EntriesByUID.end() ? nullptr : It->getSecond();
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrEmplaceEntryForFilename(StringRef Filename,
+                                 llvm::ErrorOr<llvm::vfs::Status> Stat) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto Insertion = EntriesByFilename.insert({Filename, nullptr});
+  if (Insertion.second)
+    Insertion.first->second =
+        new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
+  return *Insertion.first->second;
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
+    llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
+    std::unique_ptr<llvm::MemoryBuffer> Contents) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto Insertion = EntriesByUID.insert({UID, nullptr});
+  if (Insertion.second) {
+    CachedFileContents *StoredContents = nullptr;
+    if (Contents)
+      StoredContents = new (ContentsStorage.Allocate())
+          CachedFileContents(std::move(Contents));
+    Insertion.first->second = new (EntryStorage.Allocate())
+        CachedFileSystemEntry(std::move(Stat), StoredContents);
+  }
+  return *Insertion.first->second;
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrInsertEntryForFilename(StringRef Filename,
+                                const CachedFileSystemEntry &Entry) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
 }
 
 /// Whitelist file extensions that should be minimized, treating no extension as
@@ -148,53 +225,63 @@ bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
   return !NotToBeMinimized.contains(Filename);
 }
 
-void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
-                                 StringRef Filename,
-                                 llvm::vfs::FileSystem &FS) {
-  if (!MaybeStatus || MaybeStatus->isDirectory())
-    MaybeStat = std::move(MaybeStatus);
-  else
-    MaybeStat = initFile(Filename, FS);
+const CachedFileSystemEntry &
+DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
+    TentativeEntry TEntry) {
+  auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
+  return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
+                                       std::move(TEntry.Status),
+                                       std::move(TEntry.Contents));
 }
 
-llvm::ErrorOr<EntryRef>
-DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
+const CachedFileSystemEntry *
+DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
     StringRef Filename) {
-  bool ShouldBeMinimized = shouldMinimize(Filename);
-
-  const auto *Entry = LocalCache.getCachedEntry(Filename);
-  if (Entry && !Entry->needsUpdate(ShouldBeMinimized))
-    return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
-
-  // FIXME: Handle PCM/PCH files.
-  // FIXME: Handle module map files.
-
-  auto &SharedCacheEntry = SharedCache.get(Filename);
-  {
-    std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
-    CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
-
-    if (!CacheEntry.isInitialized()) {
-      auto MaybeStatus = getUnderlyingFS().status(Filename);
-      if (!MaybeStatus && !shouldCacheStatFailures(Filename))
-        // HACK: We need to always restat non source files if the stat fails.
-        //   This is because Clang first looks up the module cache and module
-        //   files before building them, and then looks for them again. If we
-        //   cache the stat failure, it won't see them the second time.
-        return MaybeStatus.getError();
-      CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS());
-    }
+  if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
+    return Entry;
+  auto &Shard = SharedCache.getShardForFilename(Filename);
+  if (const auto *Entry = Shard.findEntryByFilename(Filename))
+    return &LocalCache.insertEntryForFilename(Filename, *Entry);
+  return nullptr;
+}
 
-    // Checking `needsUpdate` verifies the entry represents an opened file.
-    // Only checking `needsMinimization` could lead to minimization of files
-    // that we failed to load (such files don't have `OriginalContents`).
-    if (CacheEntry.needsUpdate(ShouldBeMinimized))
-      CacheEntry.minimizeFile();
+llvm::ErrorOr<const CachedFileSystemEntry &>
+DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
+  llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
+  if (!Stat) {
+    if (!shouldCacheStatFailures(Filename))
+      return Stat.getError();
+    const auto &Entry =
+        getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
+    return insertLocalEntryForFilename(Filename, Entry);
   }
 
-  // Store the result in the local cache.
-  Entry = &SharedCacheEntry.Value;
-  return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
+  if (const auto *Entry = findSharedEntryByUID(*Stat))
+    return insertLocalEntryForFilename(Filename, *Entry);
+
+  auto TEntry =
+      Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
+
+  const CachedFileSystemEntry *SharedEntry = [&]() {
+    if (TEntry) {
+      const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
+      return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
+    }
+    return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
+  }();
+
+  return insertLocalEntryForFilename(Filename, *SharedEntry);
+}
+
+llvm::ErrorOr<EntryRef>
+DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
+    StringRef Filename) {
+  if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
+    return minimizeIfNecessary(*Entry, Filename).unwrapError();
+  auto MaybeEntry = computeAndStoreResult(Filename);
+  if (!MaybeEntry)
+    return MaybeEntry.getError();
+  return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError();
 }
 
 llvm::ErrorOr<llvm::vfs::Status>
@@ -247,7 +334,8 @@ llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
     return std::make_error_code(std::errc::is_a_directory);
 
   auto Result = std::make_unique<MinimizedVFSFile>(
-      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getName(),
+      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
+                                       Entry.getStatus().getName(),
                                        /*RequiresNullTerminator=*/false),
       Entry.getStatus());
 

diff  --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp
index 90ab1df267530..784d759986375 100644
--- a/clang/unittests/Tooling/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -224,6 +224,8 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) {
   EXPECT_TRUE(StatusFull1);
   EXPECT_EQ(StatusMinimized0->getSize(), 17u);
   EXPECT_EQ(StatusFull1->getSize(), 30u);
+  EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h"));
+  EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h"));
 }
 
 TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
@@ -245,6 +247,8 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
   EXPECT_TRUE(StatusMinimized1);
   EXPECT_EQ(StatusFull0->getSize(), 30u);
   EXPECT_EQ(StatusMinimized1->getSize(), 17u);
+  EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h"));
+  EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h"));
 }
 
 } // end namespace dependencies


        


More information about the cfe-commits mailing list