[llvm-branch-commits] [clang-tools-extra] d95db16 - [clangd] Extract common file-caching logic from ConfigProvider.

Sam McCall via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Nov 25 03:14:07 PST 2020


Author: Sam McCall
Date: 2020-11-25T12:09:13+01:00
New Revision: d95db1693cbf80b9de58a94b50178fddd62c3e15

URL: https://github.com/llvm/llvm-project/commit/d95db1693cbf80b9de58a94b50178fddd62c3e15
DIFF: https://github.com/llvm/llvm-project/commit/d95db1693cbf80b9de58a94b50178fddd62c3e15.diff

LOG: [clangd] Extract common file-caching logic from ConfigProvider.

The plan is to use this to use this for .clang-format, .clang-tidy, and
compile_commands.json. (Currently the former two are reparsed every
time, and the latter is cached forever and changes are never seen).

Differential Revision: https://reviews.llvm.org/D88172

Added: 
    clang-tools-extra/clangd/support/FileCache.cpp
    clang-tools-extra/clangd/support/FileCache.h
    clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp

Modified: 
    clang-tools-extra/clangd/ConfigProvider.cpp
    clang-tools-extra/clangd/ConfigProvider.h
    clang-tools-extra/clangd/support/CMakeLists.txt
    clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp
index 5a00ec5048c0..e020ec04d1d2 100644
--- a/clang-tools-extra/clangd/ConfigProvider.cpp
+++ b/clang-tools-extra/clangd/ConfigProvider.cpp
@@ -9,6 +9,7 @@
 #include "ConfigProvider.h"
 #include "Config.h"
 #include "ConfigFragment.h"
+#include "support/FileCache.h"
 #include "support/ThreadsafeFS.h"
 #include "support/Trace.h"
 #include "llvm/ADT/ScopeExit.h"
@@ -24,89 +25,28 @@ namespace clangd {
 namespace config {
 
 // Threadsafe cache around reading a YAML config file from disk.
-class FileConfigCache {
-  std::mutex Mu;
-  std::chrono::steady_clock::time_point ValidTime = {};
-  llvm::SmallVector<CompiledFragment, 1> CachedValue;
-  llvm::sys::TimePoint<> MTime = {};
-  unsigned Size = -1;
-
-  // Called once we are sure we want to read the file.
-  // REQUIRES: Cache keys are set. Mutex must be held.
-  void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) {
-    CachedValue.clear();
-
-    auto Buf = FS.getBufferForFile(Path);
-    // If we failed to read (but stat succeeded), don't cache failure.
-    if (!Buf) {
-      Size = -1;
-      MTime = {};
-      return;
-    }
-
-    // If file changed between stat and open, we don't know its mtime.
-    // For simplicity, don't cache the value in this case (use a bad key).
-    if (Buf->get()->getBufferSize() != Size) {
-      Size = -1;
-      MTime = {};
-    }
-
-    // Finally parse and compile the actual fragments.
-    for (auto &Fragment :
-         Fragment::parseYAML(Buf->get()->getBuffer(), Path, DC)) {
-      Fragment.Source.Directory = Directory;
-      CachedValue.push_back(std::move(Fragment).compile(DC));
-    }
-  }
-
-public:
-  // Must be set before the cache is used. Not a constructor param to allow
-  // computing ancestor-relative paths to be deferred.
-  std::string Path;
-  // Directory associated with this fragment.
+class FileConfigCache : public FileCache {
+  mutable llvm::SmallVector<CompiledFragment, 1> CachedValue;
   std::string Directory;
 
-  // Retrieves up-to-date config fragments from disk.
-  // A cached result may be reused if the mtime and size are unchanged.
-  // (But several concurrent read()s can miss the cache after a single change).
-  // Future performance ideas:
-  // - allow caches to be reused based on short elapsed walltime
-  // - allow latency-sensitive operations to skip revalidating the cache
-  void read(const ThreadsafeFS &TFS, DiagnosticCallback DC,
-            llvm::Optional<std::chrono::steady_clock::time_point> FreshTime,
-            std::vector<CompiledFragment> &Out) {
-    std::lock_guard<std::mutex> Lock(Mu);
-    // We're going to update the cache and return whatever's in it.
-    auto Return = llvm::make_scope_exit(
-        [&] { llvm::copy(CachedValue, std::back_inserter(Out)); });
-
-    // Return any sufficiently recent result without doing any further work.
-    if (FreshTime && ValidTime >= FreshTime)
-      return;
-
-    // Ensure we bump the ValidTime at the end to allow for reuse.
-    auto MarkTime = llvm::make_scope_exit(
-        [&] { ValidTime = std::chrono::steady_clock::now(); });
-
-    // Stat is cheaper than opening the file, it's usually unchanged.
-    assert(llvm::sys::path::is_absolute(Path));
-    auto FS = TFS.view(/*CWD=*/llvm::None);
-    auto Stat = FS->status(Path);
-    // If there's no file, the result is empty. Ensure we have an invalid key.
-    if (!Stat || !Stat->isRegularFile()) {
-      MTime = {};
-      Size = -1;
-      CachedValue.clear();
-      return;
-    }
-    // If the modified-time and size match, assume the content does too.
-    if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime())
-      return;
-
-    // OK, the file has actually changed. Update cache key, compute new value.
-    Size = Stat->getSize();
-    MTime = Stat->getLastModificationTime();
-    fillCacheFromDisk(*FS, DC);
+public:
+  FileConfigCache(llvm::StringRef Path, llvm::StringRef Directory)
+      : FileCache(Path), Directory(Directory) {}
+
+  void get(const ThreadsafeFS &TFS, DiagnosticCallback DC,
+           std::chrono::steady_clock::time_point FreshTime,
+           std::vector<CompiledFragment> &Out) const {
+    read(
+        TFS, FreshTime,
+        [&](llvm::Optional<llvm::StringRef> Data) {
+          CachedValue.clear();
+          if (Data)
+            for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC)) {
+              Fragment.Source.Directory = Directory;
+              CachedValue.push_back(std::move(Fragment).compile(DC));
+            }
+        },
+        [&]() { llvm::copy(CachedValue, std::back_inserter(Out)); });
   }
 };
 
@@ -120,17 +60,15 @@ std::unique_ptr<Provider> Provider::fromYAMLFile(llvm::StringRef AbsPath,
     std::vector<CompiledFragment>
     getFragments(const Params &P, DiagnosticCallback DC) const override {
       std::vector<CompiledFragment> Result;
-      Cache.read(FS, DC, P.FreshTime, Result);
+      Cache.get(FS, DC, P.FreshTime, Result);
       return Result;
     };
 
   public:
     AbsFileProvider(llvm::StringRef Path, llvm::StringRef Directory,
                     const ThreadsafeFS &FS)
-        : FS(FS) {
+        : Cache(Path, Directory), FS(FS) {
       assert(llvm::sys::path::is_absolute(Path));
-      Cache.Path = Path.str();
-      Cache.Directory = Directory.str();
     }
   };
 
@@ -174,23 +112,21 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath,
       {
         std::lock_guard<std::mutex> Lock(Mu);
         for (llvm::StringRef Ancestor : Ancestors) {
-          auto R = Cache.try_emplace(Ancestor);
+          auto It = Cache.find(Ancestor);
           // Assemble the actual config file path only once.
-          if (R.second) {
+          if (It == Cache.end()) {
             llvm::SmallString<256> ConfigPath = Ancestor;
             path::append(ConfigPath, RelPath);
-            R.first->second.Path = ConfigPath.str().str();
-            R.first->second.Directory = Ancestor.str();
+            It = Cache.try_emplace(Ancestor, ConfigPath.str(), Ancestor).first;
           }
-          Caches.push_back(&R.first->second);
+          Caches.push_back(&It->second);
         }
       }
       // Finally query each individual file.
       // This will take a (per-file) lock for each file that actually exists.
       std::vector<CompiledFragment> Result;
-      for (FileConfigCache *Cache : Caches) {
-        Cache->read(FS, DC, P.FreshTime, Result);
-      }
+      for (FileConfigCache *Cache : Caches)
+        Cache->get(FS, DC, P.FreshTime, Result);
       return Result;
     };
 

diff  --git a/clang-tools-extra/clangd/ConfigProvider.h b/clang-tools-extra/clangd/ConfigProvider.h
index 54f4b1467236..67ab833ee551 100644
--- a/clang-tools-extra/clangd/ConfigProvider.h
+++ b/clang-tools-extra/clangd/ConfigProvider.h
@@ -38,8 +38,9 @@ struct Params {
   llvm::StringRef Path;
   /// Hint that stale data is OK to improve performance (e.g. avoid IO).
   /// FreshTime sets a bound for how old the data can be.
-  /// If not set, providers should validate caches against the data source.
-  llvm::Optional<std::chrono::steady_clock::time_point> FreshTime;
+  /// By default, providers should validate caches against the data source.
+  std::chrono::steady_clock::time_point FreshTime =
+      std::chrono::steady_clock::time_point::max();
 };
 
 /// Used to report problems in parsing or interpreting a config.

diff  --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt
index e3412447142c..f0fe073eb136 100644
--- a/clang-tools-extra/clangd/support/CMakeLists.txt
+++ b/clang-tools-extra/clangd/support/CMakeLists.txt
@@ -19,6 +19,7 @@ include_directories(..)
 add_clang_library(clangdSupport
   Cancellation.cpp
   Context.cpp
+  FileCache.cpp
   Logger.cpp
   Markup.cpp
   MemoryTree.cpp

diff  --git a/clang-tools-extra/clangd/support/FileCache.cpp b/clang-tools-extra/clangd/support/FileCache.cpp
new file mode 100644
index 000000000000..4c2795015845
--- /dev/null
+++ b/clang-tools-extra/clangd/support/FileCache.cpp
@@ -0,0 +1,80 @@
+//===--- FileCache.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/FileCache.h"
+
+namespace clang {
+namespace clangd {
+
+// Sentinel values for the Size cache key. In both cases, a successful stat of
+// the file will never result in the cached value being reused.
+
+// The cached value does not reflect the current content on disk.
+static constexpr uint64_t CacheDiskMismatch =
+    std::numeric_limits<uint64_t>::max();
+// The cached value reflects that the file doesn't exist.
+static constexpr uint64_t FileNotFound = CacheDiskMismatch - 1;
+
+FileCache::FileCache(llvm::StringRef Path)
+    : Path(Path), ValidTime(std::chrono::steady_clock::time_point::min()),
+      ModifiedTime(), Size(CacheDiskMismatch) {
+  assert(llvm::sys::path::is_absolute(Path));
+}
+
+void FileCache::read(
+    const ThreadsafeFS &TFS, std::chrono::steady_clock::time_point FreshTime,
+    llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
+    llvm::function_ref<void()> Read) const {
+
+  std::lock_guard<std::mutex> Lock(Mu);
+  // We're going to update the cache and return whatever's in it.
+  auto Return = llvm::make_scope_exit(Read);
+
+  // Return any sufficiently recent result without doing any further work.
+  if (ValidTime > FreshTime)
+    return;
+
+  // Ensure we always bump ValidTime, so that FreshTime imposes a hard limit on
+  // how often we do IO.
+  auto BumpValidTime = llvm::make_scope_exit(
+      [&] { ValidTime = std::chrono::steady_clock::now(); });
+
+  // stat is cheaper than opening the file. It's usually unchanged.
+  assert(llvm::sys::path::is_absolute(Path));
+  auto FS = TFS.view(/*CWD=*/llvm::None);
+  auto Stat = FS->status(Path);
+  if (!Stat || !Stat->isRegularFile()) {
+    if (Size != FileNotFound) // Allow "not found" value to be cached.
+      Parse(llvm::None);
+    // Ensure the cache key won't match any future stat().
+    Size = FileNotFound;
+    return;
+  }
+  // If the modified-time and size match, assume the content does too.
+  if (Size == Stat->getSize() &&
+      ModifiedTime == Stat->getLastModificationTime())
+    return;
+
+  // OK, the file has actually changed. Update cache key, compute new value.
+  Size = Stat->getSize();
+  ModifiedTime = Stat->getLastModificationTime();
+  // Now read the file from disk.
+  if (auto Buf = FS->getBufferForFile(Path)) {
+    Parse(Buf->get()->getBuffer());
+    // Result is cacheable if the actual read size matches the new cache key.
+    // (We can't update the cache key, because we don't know the new mtime).
+    if (Buf->get()->getBufferSize() != Size)
+      Size = CacheDiskMismatch;
+  } else {
+    // File was unreadable. Keep the old value and try again next time.
+    Size = CacheDiskMismatch;
+  }
+}
+
+} // namespace clangd
+} // namespace clang

diff  --git a/clang-tools-extra/clangd/support/FileCache.h b/clang-tools-extra/clangd/support/FileCache.h
new file mode 100644
index 000000000000..75782e9ae021
--- /dev/null
+++ b/clang-tools-extra/clangd/support/FileCache.h
@@ -0,0 +1,81 @@
+//===--- FileCache.h - Revalidating cache of data from disk ------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
+
+#include "Path.h"
+#include "ThreadsafeFS.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+
+/// Base class for threadsafe cache of data read from a file on disk.
+///
+/// We want configuration files to be "live" as much as possible.
+/// Reading them every time is simplest, but caching solves a few problems:
+///  - reading and parsing is cheap but not free (and happens on hot paths)
+///  - we can ignore invalid data and use the old value (we may see truncated
+///    compile_commands.json from non-atomic writers)
+///  - we avoid reporting the same errors repeatedly
+///
+/// We still read and parse the data synchronously on demand, but skip as much
+/// work as possible:
+///  - if not enough wall-time has elapsed, assume the data is still up-to-date
+///  - if we stat the file and it has the same mtime + size, don't read it
+///  - obviously we only have to parse when we re-read the file
+/// (Tracking OS change events is an alternative, but 
diff icult to do portably.)
+///
+/// Caches for particular data (e.g. compilation databases) should inherit and:
+///  - add mutable storage for the cached parsed data
+///  - add a public interface implemented on top of read()
+class FileCache {
+protected:
+  // Path must be absolute.
+  FileCache(PathRef Path);
+
+  // Updates the cached value if needed, then provides threadsafe access to it.
+  //
+  // Specifically:
+  // - Parse() may be called (if the cache was not up-to-date)
+  //   The lock is held, so cache storage may be safely written.
+  //   Parse(None) means the file doesn't exist.
+  // - Read() will always be called, to provide access to the value.
+  //   The lock is again held, so the value can be copied or used.
+  //
+  // If the last Parse is newer than FreshTime, we don't check metadata.
+  //   - time_point::min() means we only do IO if we never read the file before
+  //   - time_point::max() means we always at least stat the file
+  //   - steady_clock::now() + seconds(1) means we accept 1 second of staleness
+  void read(const ThreadsafeFS &TFS,
+            std::chrono::steady_clock::time_point FreshTime,
+            llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
+            llvm::function_ref<void()> Read) const;
+
+  PathRef path() const { return Path; }
+
+private:
+  std::string Path;
+  // Members are mutable so read() can present a const interface.
+  // (It is threadsafe and approximates read-through to TFS).
+  mutable std::mutex Mu;
+  // Time when the cache was known valid (reflected disk state).
+  mutable std::chrono::steady_clock::time_point ValidTime;
+  // Filesystem metadata corresponding to the currently cached data.
+  mutable std::chrono::system_clock::time_point ModifiedTime;
+  mutable uint64_t Size;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif

diff  --git a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
index 29ab8f3b3606..d6af5856cc49 100644
--- a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
@@ -151,6 +151,7 @@ TEST(ProviderTest, FromAncestorRelativeYAMLFiles) {
   EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz"));
 }
 
+// FIXME: delete this test, it's covered by FileCacheTests.
 TEST(ProviderTest, Staleness) {
   MockFS FS;
 

diff  --git a/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp b/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp
new file mode 100644
index 000000000000..1f554806def0
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp
@@ -0,0 +1,83 @@
+//===-- FileCacheTests.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/FileCache.h"
+
+#include "TestFS.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <atomic>
+#include <chrono>
+
+namespace clang {
+namespace clangd {
+namespace config {
+namespace {
+
+class TestCache : public FileCache {
+  MockFS FS;
+  mutable std::string Value;
+
+public:
+  TestCache() : FileCache(testPath("foo.cc")) {}
+
+  void setContents(const char *C) {
+    if (C)
+      FS.Files[testPath("foo.cc")] = C;
+    else
+      FS.Files.erase(testPath("foo.cc"));
+  }
+
+  std::string get(std::chrono::steady_clock::time_point FreshTime,
+                  bool ExpectParse) const {
+    bool GotParse = false;
+    bool GotRead;
+    std::string Result;
+    read(
+        FS, FreshTime,
+        [&](llvm::Optional<llvm::StringRef> Data) {
+          GotParse = true;
+          Value = Data.getValueOr("").str();
+        },
+        [&]() {
+          GotRead = true;
+          Result = Value;
+        });
+    EXPECT_EQ(GotParse, ExpectParse);
+    EXPECT_TRUE(GotRead);
+    return Result;
+  }
+};
+
+TEST(FileCacheTest, Invalidation) {
+  TestCache C;
+
+  auto StaleOK = std::chrono::steady_clock::now();
+  auto MustBeFresh = StaleOK + std::chrono::hours(1);
+
+  C.setContents("a");
+  EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/true)) << "Parsed first time";
+  EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("a", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (stat)";
+  C.setContents("bb");
+  EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
+  EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Cached (stat)";
+  C.setContents(nullptr);
+  EXPECT_EQ("bb", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Stat failed";
+  EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (404)";
+  C.setContents("bb"); // Match the previous stat values!
+  EXPECT_EQ("", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+  EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
+}
+
+} // namespace
+} // namespace config
+} // namespace clangd
+} // namespace clang


        


More information about the llvm-branch-commits mailing list