[clang] [clang][deps] Cache `VFS::getRealPath()` (PR #68645)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 23 07:56:16 PST 2024


https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/68645

>From 3970f76778923189a9b1e7ec5fef457ac8dba357 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 9 Oct 2023 10:14:17 -0700
Subject: [PATCH 1/3] [clang] Move lookup filename into function

---
 .../DependencyScanningFilesystem.h            |  4 ++
 .../DependencyScanningFilesystem.cpp          | 41 ++++++++++++-------
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index dbe219b6dd8d723..c3cd69ab720effd 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -400,6 +400,10 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
 
   void updateWorkingDirForCacheLookup();
+
+  llvm::ErrorOr<StringRef>
+  tryGetFilenameForLookup(StringRef OriginalFilename,
+                          llvm::SmallVectorImpl<char> &PathBuf) const;
 };
 
 } // end namespace dependencies
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 3e53c8fc5740875..44b39c5195c62b6 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -258,26 +258,17 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(
 llvm::ErrorOr<EntryRef>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
     StringRef OriginalFilename, bool DisableDirectivesScanning) {
-  StringRef FilenameForLookup;
   SmallString<256> PathBuf;
-  if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
-    FilenameForLookup = OriginalFilename;
-  } else if (!WorkingDirForCacheLookup) {
-    return WorkingDirForCacheLookup.getError();
-  } else {
-    StringRef RelFilename = OriginalFilename;
-    RelFilename.consume_front("./");
-    PathBuf = *WorkingDirForCacheLookup;
-    llvm::sys::path::append(PathBuf, RelFilename);
-    FilenameForLookup = PathBuf.str();
-  }
-  assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
+  auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
+  if (!FilenameForLookup)
+    return FilenameForLookup.getError();
+
   if (const auto *Entry =
-          findEntryByFilenameWithWriteThrough(FilenameForLookup))
+          findEntryByFilenameWithWriteThrough(*FilenameForLookup))
     return scanForDirectivesIfNecessary(*Entry, OriginalFilename,
                                         DisableDirectivesScanning)
         .unwrapError();
-  auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
+  auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup);
   if (!MaybeEntry)
     return MaybeEntry.getError();
   return scanForDirectivesIfNecessary(*MaybeEntry, OriginalFilename,
@@ -379,3 +370,23 @@ void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
   assert(!WorkingDirForCacheLookup ||
          llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
 }
+
+llvm::ErrorOr<StringRef>
+DependencyScanningWorkerFilesystem::tryGetFilenameForLookup(
+    StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const {
+  StringRef FilenameForLookup;
+  if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
+    FilenameForLookup = OriginalFilename;
+  } else if (!WorkingDirForCacheLookup) {
+    return WorkingDirForCacheLookup.getError();
+  } else {
+    StringRef RelFilename = OriginalFilename;
+    RelFilename.consume_front("./");
+    PathBuf.assign(WorkingDirForCacheLookup->begin(),
+                   WorkingDirForCacheLookup->end());
+    llvm::sys::path::append(PathBuf, RelFilename);
+    FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()};
+  }
+  assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
+  return FilenameForLookup;
+}

>From bdccf1e7858826b5f41791cd0826f9e230de9197 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 9 Oct 2023 10:14:22 -0700
Subject: [PATCH 2/3] [clang][deps] Cache `VFS::getRealPath()`

---
 .../DependencyScanningFilesystem.h            | 46 ++++++++++-
 .../DependencyScanningFilesystem.cpp          | 76 +++++++++++++++++++
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index c3cd69ab720effd..b7ffd377ea017fa 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -142,6 +142,8 @@ class CachedFileSystemEntry {
   CachedFileContents *Contents;
 };
 
+using CachedRealPath = llvm::ErrorOr<std::string>;
+
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
 /// underlying real file system, and the scanned preprocessor directives of
 /// files.
@@ -168,6 +170,12 @@ class DependencyScanningFilesystemSharedCache {
     /// The backing storage for cached contents.
     llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
 
+    /// Map from filenames to cached real paths.
+    llvm::StringMap<const CachedRealPath *> RealPathsByFilename;
+
+    /// The backing storage for cached real paths.
+    llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
+
     /// Returns entry associated with the filename or nullptr if none is found.
     const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
 
@@ -194,6 +202,17 @@ class DependencyScanningFilesystemSharedCache {
     const CachedFileSystemEntry &
     getOrInsertEntryForFilename(StringRef Filename,
                                 const CachedFileSystemEntry &Entry);
+
+    /// Returns real path associated with the filename or nullptr if none is
+    /// found.
+    const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
+
+    /// Returns real path associated with the filename if there is some.
+    /// Otherwise, constructs new one with the given one, associates it with the
+    /// filename and returns the result.
+    const CachedRealPath &
+    getOrEmplaceRealPathForFilename(StringRef Filename,
+                                    llvm::ErrorOr<StringRef> RealPath);
   };
 
   DependencyScanningFilesystemSharedCache();
@@ -212,6 +231,8 @@ class DependencyScanningFilesystemSharedCache {
 class DependencyScanningFilesystemLocalCache {
   llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
 
+  llvm::StringMap<const CachedRealPath *, llvm::BumpPtrAllocator> RealPathCache;
+
 public:
   /// Returns entry associated with the filename or nullptr if none is found.
   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
@@ -230,6 +251,26 @@ class DependencyScanningFilesystemLocalCache {
     assert(InsertedEntry == &Entry && "entry already present");
     return *InsertedEntry;
   }
+
+  /// Returns real path associated with the filename or nullptr if none is
+  /// found.
+  const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    auto It = RealPathCache.find(Filename);
+    return It == RealPathCache.end() ? nullptr : It->getValue();
+  }
+
+  /// Associates the given real path with the filename and returns the given
+  /// entry pointer (for convenience).
+  const CachedRealPath &
+  insertRealPathForFilename(StringRef Filename,
+                            const CachedRealPath &RealPath) {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    const auto *InsertedRealPath =
+        RealPathCache.insert({Filename, &RealPath}).first->second;
+    assert(InsertedRealPath == &RealPath && "entry already present");
+    return *InsertedRealPath;
+  }
 };
 
 /// Reference to a CachedFileSystemEntry.
@@ -290,6 +331,9 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
   openFileForRead(const Twine &Path) override;
 
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) const override;
+
   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 
   /// Returns entry for the given filename.
@@ -393,7 +437,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
-  DependencyScanningFilesystemLocalCache LocalCache;
+  mutable DependencyScanningFilesystemLocalCache LocalCache;
 
   /// The working directory to use for making relative paths absolute before
   /// using them for cache lookups.
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 44b39c5195c62b6..01b94efce6a8674 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -161,6 +161,35 @@ DependencyScanningFilesystemSharedCache::CacheShard::
   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
 }
 
+const CachedRealPath *
+DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
+    StringRef Filename) const {
+  assert(llvm::sys::path::is_absolute_gnu(Filename));
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = RealPathsByFilename.find(Filename);
+  return It == RealPathsByFilename.end() ? nullptr : It->getValue();
+}
+
+const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrEmplaceRealPathForFilename(StringRef Filename,
+                                    llvm::ErrorOr<llvm::StringRef> RealPath) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+
+  auto Insertion = RealPathsByFilename.insert({Filename, nullptr});
+  if (Insertion.second) {
+    auto OwnedRealPath = [&]() -> CachedRealPath {
+      if (!RealPath)
+        return RealPath.getError();
+      return RealPath->str();
+    }();
+
+    Insertion.first->second = new (RealPathStorage.Allocate())
+        CachedRealPath(std::move(OwnedRealPath));
+  }
+
+  return *Insertion.first->second;
+}
+
 /// Whitelist file extensions that should be minimized, treating no extension as
 /// a source file that should be minimized.
 ///
@@ -350,6 +379,53 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
   return DepScanFile::create(Result.get());
 }
 
+std::error_code DependencyScanningWorkerFilesystem::getRealPath(
+    const Twine &Path, SmallVectorImpl<char> &Output) const {
+  SmallString<256> OwnedFilename;
+  StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
+
+  SmallString<256> PathBuf;
+  auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
+  if (!FilenameForLookup)
+    return FilenameForLookup.getError();
+
+  auto HandleCachedRealPath =
+      [&Output](const CachedRealPath &RealPath) -> std::error_code {
+    if (!RealPath)
+      return RealPath.getError();
+    Output.assign(RealPath->begin(), RealPath->end());
+    return {};
+  };
+
+  // If we already have the result in local cache, no work required.
+  if (const auto *RealPath =
+          LocalCache.findRealPathByFilename(*FilenameForLookup))
+    return HandleCachedRealPath(*RealPath);
+
+  // If we have the result in the shared cache, cache it locally.
+  auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
+  if (const auto *ShardRealPath =
+          Shard.findRealPathByFilename(*FilenameForLookup)) {
+    const auto &RealPath =
+        LocalCache.insertRealPathForFilename(*FilenameForLookup, *ShardRealPath);
+    return HandleCachedRealPath(RealPath);
+  }
+
+  // If we don't know the real path, compute it...
+  std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
+  llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
+  if (!EC)
+    ComputedRealPath = StringRef{Output.data(), Output.size()};
+
+  // ...and try to write it into the shared cache. In case some other thread won
+  // this race and already wrote its own result there, just adopt it. Write
+  // whatever is in the shared cache into the local one.
+  const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
+      *FilenameForLookup, ComputedRealPath);
+  return HandleCachedRealPath(
+      LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
+}
+
 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
     const Twine &Path) {
   std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);

>From ae53ff7f0a2e026cfe625e9ae003bc9828aec48d Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Fri, 13 Oct 2023 13:26:28 -0700
Subject: [PATCH 3/3] WIP

---
 .../DependencyScanningFilesystem.h            | 37 +++++++++----------
 .../DependencyScanningFilesystem.cpp          | 27 +++++++-------
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index b7ffd377ea017fa..bf596c0fca56c03 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -156,9 +156,11 @@ class DependencyScanningFilesystemSharedCache {
     /// The mutex that needs to be locked before mutation of any member.
     mutable std::mutex CacheLock;
 
-    /// Map from filenames to cached entries.
-    llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
-        EntriesByFilename;
+    /// Map from filenames to cached entries and real paths.
+    llvm::StringMap<
+        std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
+        llvm::BumpPtrAllocator>
+        CacheByFilename;
 
     /// Map from unique IDs to cached entries.
     llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
@@ -170,9 +172,6 @@ class DependencyScanningFilesystemSharedCache {
     /// The backing storage for cached contents.
     llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
 
-    /// Map from filenames to cached real paths.
-    llvm::StringMap<const CachedRealPath *> RealPathsByFilename;
-
     /// The backing storage for cached real paths.
     llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
 
@@ -229,16 +228,17 @@ class DependencyScanningFilesystemSharedCache {
 /// This class is a local cache, that caches the 'stat' and 'open' calls to the
 /// underlying real file system.
 class DependencyScanningFilesystemLocalCache {
-  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
-
-  llvm::StringMap<const CachedRealPath *, llvm::BumpPtrAllocator> RealPathCache;
+  llvm::StringMap<
+      std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
+      llvm::BumpPtrAllocator>
+      Cache;
 
 public:
   /// Returns entry associated with the filename or nullptr if none is found.
   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
     assert(llvm::sys::path::is_absolute_gnu(Filename));
     auto It = Cache.find(Filename);
-    return It == Cache.end() ? nullptr : It->getValue();
+    return It == Cache.end() ? nullptr : It->getValue().first;
   }
 
   /// Associates the given entry with the filename and returns the given entry
@@ -247,17 +247,17 @@ class DependencyScanningFilesystemLocalCache {
   insertEntryForFilename(StringRef Filename,
                          const CachedFileSystemEntry &Entry) {
     assert(llvm::sys::path::is_absolute_gnu(Filename));
-    const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
-    assert(InsertedEntry == &Entry && "entry already present");
-    return *InsertedEntry;
+    assert(Cache[Filename].first == nullptr && "entry already present");
+    Cache[Filename].first = &Entry;
+    return Entry;
   }
 
   /// Returns real path associated with the filename or nullptr if none is
   /// found.
   const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
     assert(llvm::sys::path::is_absolute_gnu(Filename));
-    auto It = RealPathCache.find(Filename);
-    return It == RealPathCache.end() ? nullptr : It->getValue();
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue().second;
   }
 
   /// Associates the given real path with the filename and returns the given
@@ -266,10 +266,9 @@ class DependencyScanningFilesystemLocalCache {
   insertRealPathForFilename(StringRef Filename,
                             const CachedRealPath &RealPath) {
     assert(llvm::sys::path::is_absolute_gnu(Filename));
-    const auto *InsertedRealPath =
-        RealPathCache.insert({Filename, &RealPath}).first->second;
-    assert(InsertedRealPath == &RealPath && "entry already present");
-    return *InsertedRealPath;
+    assert(Cache[Filename].second == nullptr && "entry already present");
+    Cache[Filename].second = &RealPath;
+    return RealPath;
   }
 };
 
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 01b94efce6a8674..2e19829e1dd1d6c 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -112,8 +112,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
     StringRef Filename) const {
   assert(llvm::sys::path::is_absolute_gnu(Filename));
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  auto It = EntriesByFilename.find(Filename);
-  return It == EntriesByFilename.end() ? nullptr : It->getValue();
+  auto It = CacheByFilename.find(Filename);
+  return It == CacheByFilename.end() ? nullptr : It->getValue().first;
 }
 
 const CachedFileSystemEntry *
@@ -129,11 +129,11 @@ DependencyScanningFilesystemSharedCache::CacheShard::
     getOrEmplaceEntryForFilename(StringRef Filename,
                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  auto Insertion = EntriesByFilename.insert({Filename, nullptr});
-  if (Insertion.second)
-    Insertion.first->second =
+  const CachedFileSystemEntry *StoredEntry = CacheByFilename[Filename].first;
+  if (!StoredEntry)
+    StoredEntry =
         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
-  return *Insertion.first->second;
+  return *StoredEntry;
 }
 
 const CachedFileSystemEntry &
@@ -158,7 +158,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::
     getOrInsertEntryForFilename(StringRef Filename,
                                 const CachedFileSystemEntry &Entry) {
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
+  CacheByFilename[Filename].first = &Entry;
+  return Entry;
 }
 
 const CachedRealPath *
@@ -166,8 +167,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
     StringRef Filename) const {
   assert(llvm::sys::path::is_absolute_gnu(Filename));
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  auto It = RealPathsByFilename.find(Filename);
-  return It == RealPathsByFilename.end() ? nullptr : It->getValue();
+  auto It = CacheByFilename.find(Filename);
+  return It == CacheByFilename.end() ? nullptr : It->getValue().second;
 }
 
 const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
@@ -175,19 +176,19 @@ const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
                                     llvm::ErrorOr<llvm::StringRef> RealPath) {
   std::lock_guard<std::mutex> LockGuard(CacheLock);
 
-  auto Insertion = RealPathsByFilename.insert({Filename, nullptr});
-  if (Insertion.second) {
+  const CachedRealPath *StoredRealPath = CacheByFilename[Filename].second;
+  if (!StoredRealPath) {
     auto OwnedRealPath = [&]() -> CachedRealPath {
       if (!RealPath)
         return RealPath.getError();
       return RealPath->str();
     }();
 
-    Insertion.first->second = new (RealPathStorage.Allocate())
+    StoredRealPath = new (RealPathStorage.Allocate())
         CachedRealPath(std::move(OwnedRealPath));
   }
 
-  return *Insertion.first->second;
+  return *StoredRealPath;
 }
 
 /// Whitelist file extensions that should be minimized, treating no extension as



More information about the cfe-commits mailing list