[clang] a11a432 - [clang][deps] Cache `VFS::getRealPath()` (#68645)

via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 12 10:34:45 PDT 2024


Author: Jan Svoboda
Date: 2024-04-12T10:34:42-07:00
New Revision: a11a4324bb27c01e7a005e1a7f49fb8284098e8c

URL: https://github.com/llvm/llvm-project/commit/a11a4324bb27c01e7a005e1a7f49fb8284098e8c
DIFF: https://github.com/llvm/llvm-project/commit/a11a4324bb27c01e7a005e1a7f49fb8284098e8c.diff

LOG: [clang][deps] Cache `VFS::getRealPath()` (#68645)

This PR starts caching calls to
`DependencyScanningWorkerFilesystem::getRealPath()` that we use whenever
we canonicalize module map path. In the case of the real VFS, this
functions performs an expensive syscall that we'd like to do as rarely
as possible.

This PR keeps the real path out of `CachedFileSystemEntry`, since that's
**immutable**; populating the real path on creation of this data
structure (every stat/open) would be expensive.

Added: 
    clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp

Modified: 
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
    clang/unittests/Tooling/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 4cd0f958fcff82..8b6f149c7cb266 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -142,6 +142,8 @@ class CachedFileSystemEntry {
   CachedFileContents *Contents;
 };
 
+using CachedRealPath = llvm::ErrorOr<std::string>;
+
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
 /// underlying real file system, and the scanned preprocessor directives of
 /// files.
@@ -154,9 +156,11 @@ class DependencyScanningFilesystemSharedCache {
     /// The mutex that needs to be locked before mutation of any member.
     mutable std::mutex CacheLock;
 
-    /// Map from filenames to cached entries.
-    llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
-        EntriesByFilename;
+    /// Map from filenames to cached entries and real paths.
+    llvm::StringMap<
+        std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
+        llvm::BumpPtrAllocator>
+        CacheByFilename;
 
     /// Map from unique IDs to cached entries.
     llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
@@ -168,6 +172,9 @@ class DependencyScanningFilesystemSharedCache {
     /// The backing storage for cached contents.
     llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
 
+    /// The backing storage for cached real paths.
+    llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
+
     /// Returns entry associated with the filename or nullptr if none is found.
     const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
 
@@ -194,6 +201,17 @@ class DependencyScanningFilesystemSharedCache {
     const CachedFileSystemEntry &
     getOrInsertEntryForFilename(StringRef Filename,
                                 const CachedFileSystemEntry &Entry);
+
+    /// Returns the real path associated with the filename or nullptr if none is
+    /// found.
+    const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
+
+    /// Returns the real path associated with the filename if there is some.
+    /// Otherwise, constructs new one with the given one, associates it with the
+    /// filename and returns the result.
+    const CachedRealPath &
+    getOrEmplaceRealPathForFilename(StringRef Filename,
+                                    llvm::ErrorOr<StringRef> RealPath);
   };
 
   DependencyScanningFilesystemSharedCache();
@@ -210,14 +228,17 @@ class DependencyScanningFilesystemSharedCache {
 /// This class is a local cache, that caches the 'stat' and 'open' calls to the
 /// underlying real file system.
 class DependencyScanningFilesystemLocalCache {
-  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
+  llvm::StringMap<
+      std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
+      llvm::BumpPtrAllocator>
+      Cache;
 
 public:
   /// Returns entry associated with the filename or nullptr if none is found.
   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
     assert(llvm::sys::path::is_absolute_gnu(Filename));
     auto It = Cache.find(Filename);
-    return It == Cache.end() ? nullptr : It->getValue();
+    return It == Cache.end() ? nullptr : It->getValue().first;
   }
 
   /// Associates the given entry with the filename and returns the given entry
@@ -226,9 +247,40 @@ class DependencyScanningFilesystemLocalCache {
   insertEntryForFilename(StringRef Filename,
                          const CachedFileSystemEntry &Entry) {
     assert(llvm::sys::path::is_absolute_gnu(Filename));
-    const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
-    assert(InsertedEntry == &Entry && "entry already present");
-    return *InsertedEntry;
+    auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
+    auto &[CachedEntry, CachedRealPath] = It->getValue();
+    if (!Inserted) {
+      // The file is already present in the local cache. If we got here, it only
+      // contains the real path. Let's make sure the entry is populated too.
+      assert((!CachedEntry && CachedRealPath) && "entry already present");
+      CachedEntry = &Entry;
+    }
+    return *CachedEntry;
+  }
+
+  /// Returns real path associated with the filename or nullptr if none is
+  /// found.
+  const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue().second;
+  }
+
+  /// Associates the given real path with the filename and returns the given
+  /// entry pointer (for convenience).
+  const CachedRealPath &
+  insertRealPathForFilename(StringRef Filename,
+                            const CachedRealPath &RealPath) {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
+    auto &[CachedEntry, CachedRealPath] = It->getValue();
+    if (!Inserted) {
+      // The file is already present in the local cache. If we got here, it only
+      // contains the entry. Let's make sure the real path is populated too.
+      assert((!CachedRealPath && CachedEntry) && "real path already present");
+      CachedRealPath = &RealPath;
+    }
+    return *CachedRealPath;
   }
 };
 
@@ -296,6 +348,9 @@ class DependencyScanningWorkerFilesystem
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
   openFileForRead(const Twine &Path) override;
 
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) override;
+
   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 
   /// Returns entry for the given filename.

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index c66780d50fa184..84185c931b552c 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -113,8 +113,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
     StringRef Filename) const {
   assert(llvm::sys::path::is_absolute_gnu(Filename));
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  auto It = EntriesByFilename.find(Filename);
-  return It == EntriesByFilename.end() ? nullptr : It->getValue();
+  auto It = CacheByFilename.find(Filename);
+  return It == CacheByFilename.end() ? nullptr : It->getValue().first;
 }
 
 const CachedFileSystemEntry *
@@ -130,11 +130,16 @@ DependencyScanningFilesystemSharedCache::CacheShard::
     getOrEmplaceEntryForFilename(StringRef Filename,
                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  auto Insertion = EntriesByFilename.insert({Filename, nullptr});
-  if (Insertion.second)
-    Insertion.first->second =
+  auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
+  auto &[CachedEntry, CachedRealPath] = It->getValue();
+  if (!CachedEntry) {
+    // The entry is not present in the shared cache. Either the cache doesn't
+    // know about the file at all, or it only knows about its real path.
+    assert((Inserted || CachedRealPath) && "existing file with empty pair");
+    CachedEntry =
         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
-  return *Insertion.first->second;
+  }
+  return *CachedEntry;
 }
 
 const CachedFileSystemEntry &
@@ -142,16 +147,17 @@ DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
     llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
     std::unique_ptr<llvm::MemoryBuffer> Contents) {
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  auto Insertion = EntriesByUID.insert({UID, nullptr});
-  if (Insertion.second) {
+  auto [It, Inserted] = EntriesByUID.insert({UID, nullptr});
+  auto &CachedEntry = It->getSecond();
+  if (Inserted) {
     CachedFileContents *StoredContents = nullptr;
     if (Contents)
       StoredContents = new (ContentsStorage.Allocate())
           CachedFileContents(std::move(Contents));
-    Insertion.first->second = new (EntryStorage.Allocate())
+    CachedEntry = new (EntryStorage.Allocate())
         CachedFileSystemEntry(std::move(Stat), StoredContents);
   }
-  return *Insertion.first->second;
+  return *CachedEntry;
 }
 
 const CachedFileSystemEntry &
@@ -159,7 +165,40 @@ DependencyScanningFilesystemSharedCache::CacheShard::
     getOrInsertEntryForFilename(StringRef Filename,
                                 const CachedFileSystemEntry &Entry) {
   std::lock_guard<std::mutex> LockGuard(CacheLock);
-  return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
+  auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
+  auto &[CachedEntry, CachedRealPath] = It->getValue();
+  if (!Inserted || !CachedEntry)
+    CachedEntry = &Entry;
+  return *CachedEntry;
+}
+
+const CachedRealPath *
+DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
+    StringRef Filename) const {
+  assert(llvm::sys::path::is_absolute_gnu(Filename));
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = CacheByFilename.find(Filename);
+  return It == CacheByFilename.end() ? nullptr : It->getValue().second;
+}
+
+const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrEmplaceRealPathForFilename(StringRef Filename,
+                                    llvm::ErrorOr<llvm::StringRef> RealPath) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+
+  const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
+  if (!StoredRealPath) {
+    auto OwnedRealPath = [&]() -> CachedRealPath {
+      if (!RealPath)
+        return RealPath.getError();
+      return RealPath->str();
+    }();
+
+    StoredRealPath = new (RealPathStorage.Allocate())
+        CachedRealPath(std::move(OwnedRealPath));
+  }
+
+  return *StoredRealPath;
 }
 
 static bool shouldCacheStatFailures(StringRef Filename) {
@@ -321,6 +360,54 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
   return DepScanFile::create(Result.get());
 }
 
+std::error_code
+DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
+                                                SmallVectorImpl<char> &Output) {
+  SmallString<256> OwnedFilename;
+  StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
+
+  SmallString<256> PathBuf;
+  auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
+  if (!FilenameForLookup)
+    return FilenameForLookup.getError();
+
+  auto HandleCachedRealPath =
+      [&Output](const CachedRealPath &RealPath) -> std::error_code {
+    if (!RealPath)
+      return RealPath.getError();
+    Output.assign(RealPath->begin(), RealPath->end());
+    return {};
+  };
+
+  // If we already have the result in local cache, no work required.
+  if (const auto *RealPath =
+          LocalCache.findRealPathByFilename(*FilenameForLookup))
+    return HandleCachedRealPath(*RealPath);
+
+  // If we have the result in the shared cache, cache it locally.
+  auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
+  if (const auto *ShardRealPath =
+          Shard.findRealPathByFilename(*FilenameForLookup)) {
+    const auto &RealPath = LocalCache.insertRealPathForFilename(
+        *FilenameForLookup, *ShardRealPath);
+    return HandleCachedRealPath(RealPath);
+  }
+
+  // If we don't know the real path, compute it...
+  std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
+  llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
+  if (!EC)
+    ComputedRealPath = StringRef{Output.data(), Output.size()};
+
+  // ...and try to write it into the shared cache. In case some other thread won
+  // this race and already wrote its own result there, just adopt it. Write
+  // whatever is in the shared cache into the local one.
+  const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
+      *FilenameForLookup, ComputedRealPath);
+  return HandleCachedRealPath(
+      LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
+}
+
 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
     const Twine &Path) {
   std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);

diff  --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt
index 2ff493ef5fc324..0eb612f8d94987 100644
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -24,6 +24,7 @@ add_clang_unittest(ToolingTests
   QualTypeNamesTest.cpp
   RangeSelectorTest.cpp
   DependencyScanning/DependencyScannerTest.cpp
+  DependencyScanning/DependencyScanningFilesystemTest.cpp
   RecursiveASTVisitorTests/Attr.cpp
   RecursiveASTVisitorTests/BitfieldInitializer.cpp
   RecursiveASTVisitorTests/CallbacksLeaf.cpp

diff  --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
new file mode 100644
index 00000000000000..50cad72d223e35
--- /dev/null
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp
@@ -0,0 +1,152 @@
+//===- DependencyScanningFilesystemTest.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "gtest/gtest.h"
+
+using namespace clang::tooling::dependencies;
+
+namespace {
+struct InstrumentingFilesystem
+    : llvm::RTTIExtends<InstrumentingFilesystem, llvm::vfs::ProxyFileSystem> {
+  unsigned NumStatusCalls = 0;
+  unsigned NumGetRealPathCalls = 0;
+
+  using llvm::RTTIExtends<InstrumentingFilesystem,
+                          llvm::vfs::ProxyFileSystem>::RTTIExtends;
+
+  llvm::ErrorOr<llvm::vfs::Status> status(const llvm::Twine &Path) override {
+    ++NumStatusCalls;
+    return ProxyFileSystem::status(Path);
+  }
+
+  std::error_code getRealPath(const llvm::Twine &Path,
+                              llvm::SmallVectorImpl<char> &Output) override {
+    ++NumGetRealPathCalls;
+    return ProxyFileSystem::getRealPath(Path, Output);
+  }
+};
+} // namespace
+
+TEST(DependencyScanningWorkerFilesystem, CacheStatusFailures) {
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+
+  auto InstrumentingFS =
+      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, InstrumentingFS);
+  DependencyScanningWorkerFilesystem DepFS2(SharedCache, InstrumentingFS);
+
+  DepFS.status("/foo.c");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 1u);
+
+  DepFS.status("/foo.c");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 1u); // Cached, no increase.
+
+  DepFS.status("/bar.c");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 2u);
+
+  DepFS2.status("/foo.c");
+  EXPECT_EQ(InstrumentingFS->NumStatusCalls, 2u); // Shared cache.
+}
+
+TEST(DependencyScanningFilesystem, CacheGetRealPath) {
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory("/");
+  InMemoryFS->addFile("/foo", 0, llvm::MemoryBuffer::getMemBuffer(""));
+  InMemoryFS->addFile("/bar", 0, llvm::MemoryBuffer::getMemBuffer(""));
+
+  auto InstrumentingFS =
+      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, InstrumentingFS);
+  DependencyScanningWorkerFilesystem DepFS2(SharedCache, InstrumentingFS);
+
+  {
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/foo", Result);
+    EXPECT_EQ(InstrumentingFS->NumGetRealPathCalls, 1u);
+  }
+
+  {
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/foo", Result);
+    EXPECT_EQ(InstrumentingFS->NumGetRealPathCalls, 1u); // Cached, no increase.
+  }
+
+  {
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/bar", Result);
+    EXPECT_EQ(InstrumentingFS->NumGetRealPathCalls, 2u);
+  }
+
+  {
+    llvm::SmallString<128> Result;
+    DepFS2.getRealPath("/foo", Result);
+    EXPECT_EQ(InstrumentingFS->NumGetRealPathCalls, 2u); // Shared cache.
+  }
+}
+
+TEST(DependencyScanningFilesystem, RealPathAndStatusInvariants) {
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory("/");
+  InMemoryFS->addFile("/foo.c", 0, llvm::MemoryBuffer::getMemBuffer(""));
+  InMemoryFS->addFile("/bar.c", 0, llvm::MemoryBuffer::getMemBuffer(""));
+
+  auto InstrumentingFS =
+      llvm::makeIntrusiveRefCnt<InstrumentingFilesystem>(InMemoryFS);
+
+  DependencyScanningFilesystemSharedCache SharedCache;
+  DependencyScanningWorkerFilesystem DepFS(SharedCache, InstrumentingFS);
+
+  // Success.
+  {
+    DepFS.status("/foo.c");
+
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/foo.c", Result);
+  }
+  {
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/bar.c", Result);
+
+    DepFS.status("/bar.c");
+  }
+
+  // Failure.
+  {
+    DepFS.status("/foo.m");
+
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/foo.m", Result);
+  }
+  {
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/bar.m", Result);
+
+    DepFS.status("/bar.m");
+  }
+
+  // Failure without caching.
+  {
+    DepFS.status("/foo");
+
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/foo", Result);
+  }
+  {
+    llvm::SmallString<128> Result;
+    DepFS.getRealPath("/bar", Result);
+
+    DepFS.status("/bar");
+  }
+}


        


More information about the cfe-commits mailing list