[llvm-branch-commits] [clang-tools-extra] d95db16 - [clangd] Extract common file-caching logic from ConfigProvider.
Sam McCall via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Nov 25 03:14:07 PST 2020
Author: Sam McCall
Date: 2020-11-25T12:09:13+01:00
New Revision: d95db1693cbf80b9de58a94b50178fddd62c3e15
URL: https://github.com/llvm/llvm-project/commit/d95db1693cbf80b9de58a94b50178fddd62c3e15
DIFF: https://github.com/llvm/llvm-project/commit/d95db1693cbf80b9de58a94b50178fddd62c3e15.diff
LOG: [clangd] Extract common file-caching logic from ConfigProvider.
The plan is to use this to use this for .clang-format, .clang-tidy, and
compile_commands.json. (Currently the former two are reparsed every
time, and the latter is cached forever and changes are never seen).
Differential Revision: https://reviews.llvm.org/D88172
Added:
clang-tools-extra/clangd/support/FileCache.cpp
clang-tools-extra/clangd/support/FileCache.h
clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp
Modified:
clang-tools-extra/clangd/ConfigProvider.cpp
clang-tools-extra/clangd/ConfigProvider.h
clang-tools-extra/clangd/support/CMakeLists.txt
clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp
index 5a00ec5048c0..e020ec04d1d2 100644
--- a/clang-tools-extra/clangd/ConfigProvider.cpp
+++ b/clang-tools-extra/clangd/ConfigProvider.cpp
@@ -9,6 +9,7 @@
#include "ConfigProvider.h"
#include "Config.h"
#include "ConfigFragment.h"
+#include "support/FileCache.h"
#include "support/ThreadsafeFS.h"
#include "support/Trace.h"
#include "llvm/ADT/ScopeExit.h"
@@ -24,89 +25,28 @@ namespace clangd {
namespace config {
// Threadsafe cache around reading a YAML config file from disk.
-class FileConfigCache {
- std::mutex Mu;
- std::chrono::steady_clock::time_point ValidTime = {};
- llvm::SmallVector<CompiledFragment, 1> CachedValue;
- llvm::sys::TimePoint<> MTime = {};
- unsigned Size = -1;
-
- // Called once we are sure we want to read the file.
- // REQUIRES: Cache keys are set. Mutex must be held.
- void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) {
- CachedValue.clear();
-
- auto Buf = FS.getBufferForFile(Path);
- // If we failed to read (but stat succeeded), don't cache failure.
- if (!Buf) {
- Size = -1;
- MTime = {};
- return;
- }
-
- // If file changed between stat and open, we don't know its mtime.
- // For simplicity, don't cache the value in this case (use a bad key).
- if (Buf->get()->getBufferSize() != Size) {
- Size = -1;
- MTime = {};
- }
-
- // Finally parse and compile the actual fragments.
- for (auto &Fragment :
- Fragment::parseYAML(Buf->get()->getBuffer(), Path, DC)) {
- Fragment.Source.Directory = Directory;
- CachedValue.push_back(std::move(Fragment).compile(DC));
- }
- }
-
-public:
- // Must be set before the cache is used. Not a constructor param to allow
- // computing ancestor-relative paths to be deferred.
- std::string Path;
- // Directory associated with this fragment.
+class FileConfigCache : public FileCache {
+ mutable llvm::SmallVector<CompiledFragment, 1> CachedValue;
std::string Directory;
- // Retrieves up-to-date config fragments from disk.
- // A cached result may be reused if the mtime and size are unchanged.
- // (But several concurrent read()s can miss the cache after a single change).
- // Future performance ideas:
- // - allow caches to be reused based on short elapsed walltime
- // - allow latency-sensitive operations to skip revalidating the cache
- void read(const ThreadsafeFS &TFS, DiagnosticCallback DC,
- llvm::Optional<std::chrono::steady_clock::time_point> FreshTime,
- std::vector<CompiledFragment> &Out) {
- std::lock_guard<std::mutex> Lock(Mu);
- // We're going to update the cache and return whatever's in it.
- auto Return = llvm::make_scope_exit(
- [&] { llvm::copy(CachedValue, std::back_inserter(Out)); });
-
- // Return any sufficiently recent result without doing any further work.
- if (FreshTime && ValidTime >= FreshTime)
- return;
-
- // Ensure we bump the ValidTime at the end to allow for reuse.
- auto MarkTime = llvm::make_scope_exit(
- [&] { ValidTime = std::chrono::steady_clock::now(); });
-
- // Stat is cheaper than opening the file, it's usually unchanged.
- assert(llvm::sys::path::is_absolute(Path));
- auto FS = TFS.view(/*CWD=*/llvm::None);
- auto Stat = FS->status(Path);
- // If there's no file, the result is empty. Ensure we have an invalid key.
- if (!Stat || !Stat->isRegularFile()) {
- MTime = {};
- Size = -1;
- CachedValue.clear();
- return;
- }
- // If the modified-time and size match, assume the content does too.
- if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime())
- return;
-
- // OK, the file has actually changed. Update cache key, compute new value.
- Size = Stat->getSize();
- MTime = Stat->getLastModificationTime();
- fillCacheFromDisk(*FS, DC);
+public:
+ FileConfigCache(llvm::StringRef Path, llvm::StringRef Directory)
+ : FileCache(Path), Directory(Directory) {}
+
+ void get(const ThreadsafeFS &TFS, DiagnosticCallback DC,
+ std::chrono::steady_clock::time_point FreshTime,
+ std::vector<CompiledFragment> &Out) const {
+ read(
+ TFS, FreshTime,
+ [&](llvm::Optional<llvm::StringRef> Data) {
+ CachedValue.clear();
+ if (Data)
+ for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC)) {
+ Fragment.Source.Directory = Directory;
+ CachedValue.push_back(std::move(Fragment).compile(DC));
+ }
+ },
+ [&]() { llvm::copy(CachedValue, std::back_inserter(Out)); });
}
};
@@ -120,17 +60,15 @@ std::unique_ptr<Provider> Provider::fromYAMLFile(llvm::StringRef AbsPath,
std::vector<CompiledFragment>
getFragments(const Params &P, DiagnosticCallback DC) const override {
std::vector<CompiledFragment> Result;
- Cache.read(FS, DC, P.FreshTime, Result);
+ Cache.get(FS, DC, P.FreshTime, Result);
return Result;
};
public:
AbsFileProvider(llvm::StringRef Path, llvm::StringRef Directory,
const ThreadsafeFS &FS)
- : FS(FS) {
+ : Cache(Path, Directory), FS(FS) {
assert(llvm::sys::path::is_absolute(Path));
- Cache.Path = Path.str();
- Cache.Directory = Directory.str();
}
};
@@ -174,23 +112,21 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath,
{
std::lock_guard<std::mutex> Lock(Mu);
for (llvm::StringRef Ancestor : Ancestors) {
- auto R = Cache.try_emplace(Ancestor);
+ auto It = Cache.find(Ancestor);
// Assemble the actual config file path only once.
- if (R.second) {
+ if (It == Cache.end()) {
llvm::SmallString<256> ConfigPath = Ancestor;
path::append(ConfigPath, RelPath);
- R.first->second.Path = ConfigPath.str().str();
- R.first->second.Directory = Ancestor.str();
+ It = Cache.try_emplace(Ancestor, ConfigPath.str(), Ancestor).first;
}
- Caches.push_back(&R.first->second);
+ Caches.push_back(&It->second);
}
}
// Finally query each individual file.
// This will take a (per-file) lock for each file that actually exists.
std::vector<CompiledFragment> Result;
- for (FileConfigCache *Cache : Caches) {
- Cache->read(FS, DC, P.FreshTime, Result);
- }
+ for (FileConfigCache *Cache : Caches)
+ Cache->get(FS, DC, P.FreshTime, Result);
return Result;
};
diff --git a/clang-tools-extra/clangd/ConfigProvider.h b/clang-tools-extra/clangd/ConfigProvider.h
index 54f4b1467236..67ab833ee551 100644
--- a/clang-tools-extra/clangd/ConfigProvider.h
+++ b/clang-tools-extra/clangd/ConfigProvider.h
@@ -38,8 +38,9 @@ struct Params {
llvm::StringRef Path;
/// Hint that stale data is OK to improve performance (e.g. avoid IO).
/// FreshTime sets a bound for how old the data can be.
- /// If not set, providers should validate caches against the data source.
- llvm::Optional<std::chrono::steady_clock::time_point> FreshTime;
+ /// By default, providers should validate caches against the data source.
+ std::chrono::steady_clock::time_point FreshTime =
+ std::chrono::steady_clock::time_point::max();
};
/// Used to report problems in parsing or interpreting a config.
diff --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt
index e3412447142c..f0fe073eb136 100644
--- a/clang-tools-extra/clangd/support/CMakeLists.txt
+++ b/clang-tools-extra/clangd/support/CMakeLists.txt
@@ -19,6 +19,7 @@ include_directories(..)
add_clang_library(clangdSupport
Cancellation.cpp
Context.cpp
+ FileCache.cpp
Logger.cpp
Markup.cpp
MemoryTree.cpp
diff --git a/clang-tools-extra/clangd/support/FileCache.cpp b/clang-tools-extra/clangd/support/FileCache.cpp
new file mode 100644
index 000000000000..4c2795015845
--- /dev/null
+++ b/clang-tools-extra/clangd/support/FileCache.cpp
@@ -0,0 +1,80 @@
+//===--- FileCache.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/FileCache.h"
+
+namespace clang {
+namespace clangd {
+
+// Sentinel values for the Size cache key. In both cases, a successful stat of
+// the file will never result in the cached value being reused.
+
+// The cached value does not reflect the current content on disk.
+static constexpr uint64_t CacheDiskMismatch =
+ std::numeric_limits<uint64_t>::max();
+// The cached value reflects that the file doesn't exist.
+static constexpr uint64_t FileNotFound = CacheDiskMismatch - 1;
+
+FileCache::FileCache(llvm::StringRef Path)
+ : Path(Path), ValidTime(std::chrono::steady_clock::time_point::min()),
+ ModifiedTime(), Size(CacheDiskMismatch) {
+ assert(llvm::sys::path::is_absolute(Path));
+}
+
+void FileCache::read(
+ const ThreadsafeFS &TFS, std::chrono::steady_clock::time_point FreshTime,
+ llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
+ llvm::function_ref<void()> Read) const {
+
+ std::lock_guard<std::mutex> Lock(Mu);
+ // We're going to update the cache and return whatever's in it.
+ auto Return = llvm::make_scope_exit(Read);
+
+ // Return any sufficiently recent result without doing any further work.
+ if (ValidTime > FreshTime)
+ return;
+
+ // Ensure we always bump ValidTime, so that FreshTime imposes a hard limit on
+ // how often we do IO.
+ auto BumpValidTime = llvm::make_scope_exit(
+ [&] { ValidTime = std::chrono::steady_clock::now(); });
+
+ // stat is cheaper than opening the file. It's usually unchanged.
+ assert(llvm::sys::path::is_absolute(Path));
+ auto FS = TFS.view(/*CWD=*/llvm::None);
+ auto Stat = FS->status(Path);
+ if (!Stat || !Stat->isRegularFile()) {
+ if (Size != FileNotFound) // Allow "not found" value to be cached.
+ Parse(llvm::None);
+ // Ensure the cache key won't match any future stat().
+ Size = FileNotFound;
+ return;
+ }
+ // If the modified-time and size match, assume the content does too.
+ if (Size == Stat->getSize() &&
+ ModifiedTime == Stat->getLastModificationTime())
+ return;
+
+ // OK, the file has actually changed. Update cache key, compute new value.
+ Size = Stat->getSize();
+ ModifiedTime = Stat->getLastModificationTime();
+ // Now read the file from disk.
+ if (auto Buf = FS->getBufferForFile(Path)) {
+ Parse(Buf->get()->getBuffer());
+ // Result is cacheable if the actual read size matches the new cache key.
+ // (We can't update the cache key, because we don't know the new mtime).
+ if (Buf->get()->getBufferSize() != Size)
+ Size = CacheDiskMismatch;
+ } else {
+ // File was unreadable. Keep the old value and try again next time.
+ Size = CacheDiskMismatch;
+ }
+}
+
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/support/FileCache.h b/clang-tools-extra/clangd/support/FileCache.h
new file mode 100644
index 000000000000..75782e9ae021
--- /dev/null
+++ b/clang-tools-extra/clangd/support/FileCache.h
@@ -0,0 +1,81 @@
+//===--- FileCache.h - Revalidating cache of data from disk ------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FILECACHE_H
+
+#include "Path.h"
+#include "ThreadsafeFS.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+
+/// Base class for threadsafe cache of data read from a file on disk.
+///
+/// We want configuration files to be "live" as much as possible.
+/// Reading them every time is simplest, but caching solves a few problems:
+/// - reading and parsing is cheap but not free (and happens on hot paths)
+/// - we can ignore invalid data and use the old value (we may see truncated
+/// compile_commands.json from non-atomic writers)
+/// - we avoid reporting the same errors repeatedly
+///
+/// We still read and parse the data synchronously on demand, but skip as much
+/// work as possible:
+/// - if not enough wall-time has elapsed, assume the data is still up-to-date
+/// - if we stat the file and it has the same mtime + size, don't read it
+/// - obviously we only have to parse when we re-read the file
+/// (Tracking OS change events is an alternative, but
diff icult to do portably.)
+///
+/// Caches for particular data (e.g. compilation databases) should inherit and:
+/// - add mutable storage for the cached parsed data
+/// - add a public interface implemented on top of read()
+class FileCache {
+protected:
+ // Path must be absolute.
+ FileCache(PathRef Path);
+
+ // Updates the cached value if needed, then provides threadsafe access to it.
+ //
+ // Specifically:
+ // - Parse() may be called (if the cache was not up-to-date)
+ // The lock is held, so cache storage may be safely written.
+ // Parse(None) means the file doesn't exist.
+ // - Read() will always be called, to provide access to the value.
+ // The lock is again held, so the value can be copied or used.
+ //
+ // If the last Parse is newer than FreshTime, we don't check metadata.
+ // - time_point::min() means we only do IO if we never read the file before
+ // - time_point::max() means we always at least stat the file
+ // - steady_clock::now() + seconds(1) means we accept 1 second of staleness
+ void read(const ThreadsafeFS &TFS,
+ std::chrono::steady_clock::time_point FreshTime,
+ llvm::function_ref<void(llvm::Optional<llvm::StringRef>)> Parse,
+ llvm::function_ref<void()> Read) const;
+
+ PathRef path() const { return Path; }
+
+private:
+ std::string Path;
+ // Members are mutable so read() can present a const interface.
+ // (It is threadsafe and approximates read-through to TFS).
+ mutable std::mutex Mu;
+ // Time when the cache was known valid (reflected disk state).
+ mutable std::chrono::steady_clock::time_point ValidTime;
+ // Filesystem metadata corresponding to the currently cached data.
+ mutable std::chrono::system_clock::time_point ModifiedTime;
+ mutable uint64_t Size;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif
diff --git a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
index 29ab8f3b3606..d6af5856cc49 100644
--- a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
@@ -151,6 +151,7 @@ TEST(ProviderTest, FromAncestorRelativeYAMLFiles) {
EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz"));
}
+// FIXME: delete this test, it's covered by FileCacheTests.
TEST(ProviderTest, Staleness) {
MockFS FS;
diff --git a/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp b/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp
new file mode 100644
index 000000000000..1f554806def0
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp
@@ -0,0 +1,83 @@
+//===-- FileCacheTests.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/FileCache.h"
+
+#include "TestFS.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <atomic>
+#include <chrono>
+
+namespace clang {
+namespace clangd {
+namespace config {
+namespace {
+
+class TestCache : public FileCache {
+ MockFS FS;
+ mutable std::string Value;
+
+public:
+ TestCache() : FileCache(testPath("foo.cc")) {}
+
+ void setContents(const char *C) {
+ if (C)
+ FS.Files[testPath("foo.cc")] = C;
+ else
+ FS.Files.erase(testPath("foo.cc"));
+ }
+
+ std::string get(std::chrono::steady_clock::time_point FreshTime,
+ bool ExpectParse) const {
+ bool GotParse = false;
+ bool GotRead;
+ std::string Result;
+ read(
+ FS, FreshTime,
+ [&](llvm::Optional<llvm::StringRef> Data) {
+ GotParse = true;
+ Value = Data.getValueOr("").str();
+ },
+ [&]() {
+ GotRead = true;
+ Result = Value;
+ });
+ EXPECT_EQ(GotParse, ExpectParse);
+ EXPECT_TRUE(GotRead);
+ return Result;
+ }
+};
+
+TEST(FileCacheTest, Invalidation) {
+ TestCache C;
+
+ auto StaleOK = std::chrono::steady_clock::now();
+ auto MustBeFresh = StaleOK + std::chrono::hours(1);
+
+ C.setContents("a");
+ EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/true)) << "Parsed first time";
+ EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+ EXPECT_EQ("a", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (stat)";
+ C.setContents("bb");
+ EXPECT_EQ("a", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+ EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
+ EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Cached (stat)";
+ C.setContents(nullptr);
+ EXPECT_EQ("bb", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+ EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Stat failed";
+ EXPECT_EQ("", C.get(MustBeFresh, /*ExpectParse=*/false)) << "Cached (404)";
+ C.setContents("bb"); // Match the previous stat values!
+ EXPECT_EQ("", C.get(StaleOK, /*ExpectParse=*/false)) << "Cached (time)";
+ EXPECT_EQ("bb", C.get(MustBeFresh, /*ExpectParse=*/true)) << "Size changed";
+}
+
+} // namespace
+} // namespace config
+} // namespace clangd
+} // namespace clang
More information about the llvm-branch-commits
mailing list