[clang] [clang][deps] Cache `VFS::getRealPath()` (PR #68645)

via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 9 16:53:42 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Jan Svoboda (jansvoboda11)

<details>
<summary>Changes</summary>

This PR starts caching calls to `DependencyScanningWorkerFilesystem::getRealPath()` that we use whenever we canonicalize module map path. In the case of the real VFS, this functions performs an expensive syscall that we'd like to do as rarely as possible.

This PR keeps the real path out of `CachedFileSystemEntry`, since that's **immutable**; populating the real path on creation of this data structure (every stat/open) would be expensive.

---
Full diff: https://github.com/llvm/llvm-project/pull/68645.diff


2 Files Affected:

- (modified) clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h (+49-1) 
- (modified) clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp (+102-15) 


``````````diff
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index dbe219b6dd8d723..b7ffd377ea017fa 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -142,6 +142,8 @@ class CachedFileSystemEntry {
   CachedFileContents *Contents;
 };
 
+using CachedRealPath = llvm::ErrorOr<std::string>;
+
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
 /// underlying real file system, and the scanned preprocessor directives of
 /// files.
@@ -168,6 +170,12 @@ class DependencyScanningFilesystemSharedCache {
     /// The backing storage for cached contents.
     llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
 
+    /// Map from filenames to cached real paths.
+    llvm::StringMap<const CachedRealPath *> RealPathsByFilename;
+
+    /// The backing storage for cached real paths.
+    llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
+
     /// Returns entry associated with the filename or nullptr if none is found.
     const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
 
@@ -194,6 +202,17 @@ class DependencyScanningFilesystemSharedCache {
     const CachedFileSystemEntry &
     getOrInsertEntryForFilename(StringRef Filename,
                                 const CachedFileSystemEntry &Entry);
+
+    /// Returns real path associated with the filename or nullptr if none is
+    /// found.
+    const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
+
+    /// Returns real path associated with the filename if there is some.
+    /// Otherwise, constructs new one with the given one, associates it with the
+    /// filename and returns the result.
+    const CachedRealPath &
+    getOrEmplaceRealPathForFilename(StringRef Filename,
+                                    llvm::ErrorOr<StringRef> RealPath);
   };
 
   DependencyScanningFilesystemSharedCache();
@@ -212,6 +231,8 @@ class DependencyScanningFilesystemSharedCache {
 class DependencyScanningFilesystemLocalCache {
   llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
 
+  llvm::StringMap<const CachedRealPath *, llvm::BumpPtrAllocator> RealPathCache;
+
 public:
   /// Returns entry associated with the filename or nullptr if none is found.
   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
@@ -230,6 +251,26 @@ class DependencyScanningFilesystemLocalCache {
     assert(InsertedEntry == &Entry && "entry already present");
     return *InsertedEntry;
   }
+
+  /// Returns real path associated with the filename or nullptr if none is
+  /// found.
+  const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    auto It = RealPathCache.find(Filename);
+    return It == RealPathCache.end() ? nullptr : It->getValue();
+  }
+
+  /// Associates the given real path with the filename and returns the given
+  /// entry pointer (for convenience).
+  const CachedRealPath &
+  insertRealPathForFilename(StringRef Filename,
+                            const CachedRealPath &RealPath) {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    const auto *InsertedRealPath =
+        RealPathCache.insert({Filename, &RealPath}).first->second;
+    assert(InsertedRealPath == &RealPath && "entry already present");
+    return *InsertedRealPath;
+  }
 };
 
 /// Reference to a CachedFileSystemEntry.
@@ -290,6 +331,9 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
   openFileForRead(const Twine &Path) override;
 
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) const override;
+
   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 
   /// Returns entry for the given filename.
@@ -393,13 +437,17 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
-  DependencyScanningFilesystemLocalCache LocalCache;
+  mutable DependencyScanningFilesystemLocalCache LocalCache;
 
   /// The working directory to use for making relative paths absolute before
   /// using them for cache lookups.
   llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
 
   void updateWorkingDirForCacheLookup();
+
+  llvm::ErrorOr<StringRef>
+  tryGetFilenameForLookup(StringRef OriginalFilename,
+                          llvm::SmallVectorImpl<char> &PathBuf) const;
 };
 
 } // end namespace dependencies
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 3e53c8fc5740875..01b94efce6a8674 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -161,6 +161,35 @@ DependencyScanningFilesystemSharedCache::CacheShard::
   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
 }
 
+const CachedRealPath *
+DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
+    StringRef Filename) const {
+  assert(llvm::sys::path::is_absolute_gnu(Filename));
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = RealPathsByFilename.find(Filename);
+  return It == RealPathsByFilename.end() ? nullptr : It->getValue();
+}
+
+const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrEmplaceRealPathForFilename(StringRef Filename,
+                                    llvm::ErrorOr<llvm::StringRef> RealPath) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+
+  auto Insertion = RealPathsByFilename.insert({Filename, nullptr});
+  if (Insertion.second) {
+    auto OwnedRealPath = [&]() -> CachedRealPath {
+      if (!RealPath)
+        return RealPath.getError();
+      return RealPath->str();
+    }();
+
+    Insertion.first->second = new (RealPathStorage.Allocate())
+        CachedRealPath(std::move(OwnedRealPath));
+  }
+
+  return *Insertion.first->second;
+}
+
 /// Whitelist file extensions that should be minimized, treating no extension as
 /// a source file that should be minimized.
 ///
@@ -258,26 +287,17 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(
 llvm::ErrorOr<EntryRef>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
     StringRef OriginalFilename, bool DisableDirectivesScanning) {
-  StringRef FilenameForLookup;
   SmallString<256> PathBuf;
-  if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
-    FilenameForLookup = OriginalFilename;
-  } else if (!WorkingDirForCacheLookup) {
-    return WorkingDirForCacheLookup.getError();
-  } else {
-    StringRef RelFilename = OriginalFilename;
-    RelFilename.consume_front("./");
-    PathBuf = *WorkingDirForCacheLookup;
-    llvm::sys::path::append(PathBuf, RelFilename);
-    FilenameForLookup = PathBuf.str();
-  }
-  assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
+  auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
+  if (!FilenameForLookup)
+    return FilenameForLookup.getError();
+
   if (const auto *Entry =
-          findEntryByFilenameWithWriteThrough(FilenameForLookup))
+          findEntryByFilenameWithWriteThrough(*FilenameForLookup))
     return scanForDirectivesIfNecessary(*Entry, OriginalFilename,
                                         DisableDirectivesScanning)
         .unwrapError();
-  auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
+  auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup);
   if (!MaybeEntry)
     return MaybeEntry.getError();
   return scanForDirectivesIfNecessary(*MaybeEntry, OriginalFilename,
@@ -359,6 +379,53 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
   return DepScanFile::create(Result.get());
 }
 
+std::error_code DependencyScanningWorkerFilesystem::getRealPath(
+    const Twine &Path, SmallVectorImpl<char> &Output) const {
+  SmallString<256> OwnedFilename;
+  StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
+
+  SmallString<256> PathBuf;
+  auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
+  if (!FilenameForLookup)
+    return FilenameForLookup.getError();
+
+  auto HandleCachedRealPath =
+      [&Output](const CachedRealPath &RealPath) -> std::error_code {
+    if (!RealPath)
+      return RealPath.getError();
+    Output.assign(RealPath->begin(), RealPath->end());
+    return {};
+  };
+
+  // If we already have the result in local cache, no work required.
+  if (const auto *RealPath =
+          LocalCache.findRealPathByFilename(*FilenameForLookup))
+    return HandleCachedRealPath(*RealPath);
+
+  // If we have the result in the shared cache, cache it locally.
+  auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
+  if (const auto *ShardRealPath =
+          Shard.findRealPathByFilename(*FilenameForLookup)) {
+    const auto &RealPath =
+        LocalCache.insertRealPathForFilename(*FilenameForLookup, *ShardRealPath);
+    return HandleCachedRealPath(RealPath);
+  }
+
+  // If we don't know the real path, compute it...
+  std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
+  llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
+  if (!EC)
+    ComputedRealPath = StringRef{Output.data(), Output.size()};
+
+  // ...and try to write it into the shared cache. In case some other thread won
+  // this race and already wrote its own result there, just adopt it. Write
+  // whatever is in the shared cache into the local one.
+  const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
+      *FilenameForLookup, ComputedRealPath);
+  return HandleCachedRealPath(
+      LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
+}
+
 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
     const Twine &Path) {
   std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
@@ -379,3 +446,23 @@ void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
   assert(!WorkingDirForCacheLookup ||
          llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
 }
+
+llvm::ErrorOr<StringRef>
+DependencyScanningWorkerFilesystem::tryGetFilenameForLookup(
+    StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const {
+  StringRef FilenameForLookup;
+  if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
+    FilenameForLookup = OriginalFilename;
+  } else if (!WorkingDirForCacheLookup) {
+    return WorkingDirForCacheLookup.getError();
+  } else {
+    StringRef RelFilename = OriginalFilename;
+    RelFilename.consume_front("./");
+    PathBuf.assign(WorkingDirForCacheLookup->begin(),
+                   WorkingDirForCacheLookup->end());
+    llvm::sys::path::append(PathBuf, RelFilename);
+    FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()};
+  }
+  assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
+  return FilenameForLookup;
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/68645


More information about the cfe-commits mailing list