[clang] a033dbb - [Clang] Give Clang the ability to use a shared stat cache
Roman Lebedev via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 18 14:39:51 PST 2023
Looks like my comments on the review got completely ignored and not
even acknowledged.
On Thu, Jan 19, 2023 at 1:35 AM Fred Riss via cfe-commits
<cfe-commits at lists.llvm.org> wrote:
>
>
> Author: Fred Riss
> Date: 2023-01-18T14:31:27-08:00
> New Revision: a033dbbe5c43247b60869b008e67ed86ed230eaa
>
> URL: https://github.com/llvm/llvm-project/commit/a033dbbe5c43247b60869b008e67ed86ed230eaa
> DIFF: https://github.com/llvm/llvm-project/commit/a033dbbe5c43247b60869b008e67ed86ed230eaa.diff
>
> LOG: [Clang] Give Clang the ability to use a shared stat cache
>
> Every Clang instance uses an internal FileSystemStatCache to avoid
> stating the same content multiple times. However, different instances
> of Clang will contend for filesystem access for their initial stats
> during HeaderSearch or module validation.
>
> On some workloads, the time spent in the kernel in these concurrent
> stat calls has been measured to be over 20% of the overall compilation
> time. This is extremly wassteful when most of the stat calls target
> mostly immutable content like a SDK.
>
> This commit introduces a new tool `clang-stat-cache` able to generate
> an OnDiskHashmap containing the stat data for a given filesystem
> hierarchy.
>
> The driver part of this has been modeled after -ivfsoverlay given
> the similarities with what it influences. It introduces a new
> -ivfsstatcache driver option to instruct Clang to use a stat cache
> generated by `clang-stat-cache`. These stat caches are inserted at
> the bottom of the VFS stack (right above the real filesystem).
>
> Differential Revision: https://reviews.llvm.org/D136651
>
> Added:
> clang/test/Driver/vfsstatcache.c
> clang/test/clang-stat-cache/cache-effects.c
> clang/test/clang-stat-cache/errors.test
> clang/tools/clang-stat-cache/CMakeLists.txt
> clang/tools/clang-stat-cache/clang-stat-cache.cpp
> llvm/include/llvm/Support/StatCacheFileSystem.h
> llvm/lib/Support/StatCacheFileSystem.cpp
>
> Modified:
> clang/include/clang/Basic/DiagnosticFrontendKinds.td
> clang/include/clang/Driver/Options.td
> clang/include/clang/Frontend/CompilerInvocation.h
> clang/include/clang/Lex/HeaderSearchOptions.h
> clang/lib/Frontend/ASTUnit.cpp
> clang/lib/Frontend/CompilerInvocation.cpp
> clang/test/CMakeLists.txt
> clang/tools/CMakeLists.txt
> llvm/lib/Support/CMakeLists.txt
> llvm/unittests/Support/VirtualFileSystemTest.cpp
>
> Removed:
>
>
>
> ################################################################################
> diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
> index d0f672ae5a1bd..e106858688ac7 100644
> --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
> +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
> @@ -256,6 +256,11 @@ def err_test_module_file_extension_version : Error<
> "test module file extension '%0' has
> diff erent version (%1.%2) than expected "
> "(%3.%4)">;
>
> +def err_missing_vfs_stat_cache_file : Error<
> + "stat cache file '%0' not found">, DefaultFatal;
> +def err_invalid_vfs_stat_cache : Error<
> + "invalid stat cache file '%0'">, DefaultFatal;
> +
> def err_missing_vfs_overlay_file : Error<
> "virtual filesystem overlay file '%0' not found">, DefaultFatal;
> def err_invalid_vfs_overlay : Error<
>
> diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
> index ba49b335cf287..9334e6319d57b 100644
> --- a/clang/include/clang/Driver/Options.td
> +++ b/clang/include/clang/Driver/Options.td
> @@ -3357,6 +3357,8 @@ def iwithsysroot : JoinedOrSeparate<["-"], "iwithsysroot">, Group<clang_i_Group>
> HelpText<"Add directory to SYSTEM include search path, "
> "absolute paths are relative to -isysroot">, MetaVarName<"<directory>">,
> Flags<[CC1Option]>;
> +def ivfsstatcache : JoinedOrSeparate<["-"], "ivfsstatcache">, Group<clang_i_Group>, Flags<[CC1Option]>,
> + HelpText<"Use the stat data cached in file instead of doing filesystem syscalls. See clang-stat-cache utility.">;
> def ivfsoverlay : JoinedOrSeparate<["-"], "ivfsoverlay">, Group<clang_i_Group>, Flags<[CC1Option]>,
> HelpText<"Overlay the virtual filesystem described by file over the real file system">;
> def imultilib : Separate<["-"], "imultilib">, Group<gfortran_Group>;
>
> diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h
> index 254f048ed3c7e..9cc6aa5c4d8a1 100644
> --- a/clang/include/clang/Frontend/CompilerInvocation.h
> +++ b/clang/include/clang/Frontend/CompilerInvocation.h
> @@ -296,6 +296,7 @@ IntrusiveRefCntPtr<llvm::vfs::FileSystem> createVFSFromCompilerInvocation(
>
> IntrusiveRefCntPtr<llvm::vfs::FileSystem>
> createVFSFromOverlayFiles(ArrayRef<std::string> VFSOverlayFiles,
> + ArrayRef<std::string> VFSStatCacheFiles,
> DiagnosticsEngine &Diags,
> IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS);
>
>
> diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
> index 6436a9b3bde20..548f7d4493de4 100644
> --- a/clang/include/clang/Lex/HeaderSearchOptions.h
> +++ b/clang/include/clang/Lex/HeaderSearchOptions.h
> @@ -181,6 +181,9 @@ class HeaderSearchOptions {
> /// of computing the module hash.
> llvm::SmallSetVector<llvm::CachedHashString, 16> ModulesIgnoreMacros;
>
> + /// The set of user-provided stat cache files.
> + std::vector<std::string> VFSStatCacheFiles;
> +
> /// The set of user-provided virtual filesystem overlay files.
> std::vector<std::string> VFSOverlayFiles;
>
> @@ -250,6 +253,10 @@ class HeaderSearchOptions {
> SystemHeaderPrefixes.emplace_back(Prefix, IsSystemHeader);
> }
>
> + void AddVFSStatCacheFile(StringRef Name) {
> + VFSStatCacheFiles.push_back(std::string(Name));
> + }
> +
> void AddVFSOverlayFile(StringRef Name) {
> VFSOverlayFiles.push_back(std::string(Name));
> }
>
> diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
> index 3b4f25182ac95..dbf55d95746e9 100644
> --- a/clang/lib/Frontend/ASTUnit.cpp
> +++ b/clang/lib/Frontend/ASTUnit.cpp
> @@ -574,7 +574,7 @@ class ASTInfoCollector : public ASTReaderListener {
> // performs the initialization too late (once both target and language
> // options are read).
> PP.getFileManager().setVirtualFileSystem(createVFSFromOverlayFiles(
> - HSOpts.VFSOverlayFiles, PP.getDiagnostics(),
> + HSOpts.VFSOverlayFiles, HSOpts.VFSStatCacheFiles, PP.getDiagnostics(),
> PP.getFileManager().getVirtualFileSystemPtr()));
>
> InitializedHeaderSearchPaths = true;
>
> diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
> index 0bb9c8c83c63b..b0ef37fad2227 100644
> --- a/clang/lib/Frontend/CompilerInvocation.cpp
> +++ b/clang/lib/Frontend/CompilerInvocation.cpp
> @@ -83,6 +83,7 @@
> #include "llvm/Support/Path.h"
> #include "llvm/Support/Process.h"
> #include "llvm/Support/Regex.h"
> +#include "llvm/Support/StatCacheFileSystem.h"
> #include "llvm/Support/VersionTuple.h"
> #include "llvm/Support/VirtualFileSystem.h"
> #include "llvm/Support/raw_ostream.h"
> @@ -3084,6 +3085,9 @@ static void GenerateHeaderSearchArgs(HeaderSearchOptions &Opts,
> GenerateArg(Args, Opt, P.Prefix, SA);
> }
>
> + for (const std::string &F : Opts.VFSStatCacheFiles)
> + GenerateArg(Args, OPT_ivfsstatcache, F, SA);
> +
> for (const std::string &F : Opts.VFSOverlayFiles)
> GenerateArg(Args, OPT_ivfsoverlay, F, SA);
> }
> @@ -3217,6 +3221,9 @@ static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args,
> Opts.AddSystemHeaderPrefix(
> A->getValue(), A->getOption().matches(OPT_system_header_prefix));
>
> + for (const auto *A : Args.filtered(OPT_ivfsstatcache))
> + Opts.AddVFSStatCacheFile(A->getValue());
> +
> for (const auto *A : Args.filtered(OPT_ivfsoverlay))
> Opts.AddVFSOverlayFile(A->getValue());
>
> @@ -4747,12 +4754,31 @@ clang::createVFSFromCompilerInvocation(
> const CompilerInvocation &CI, DiagnosticsEngine &Diags,
> IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) {
> return createVFSFromOverlayFiles(CI.getHeaderSearchOpts().VFSOverlayFiles,
> + CI.getHeaderSearchOpts().VFSStatCacheFiles,
> Diags, std::move(BaseFS));
> }
>
> IntrusiveRefCntPtr<llvm::vfs::FileSystem> clang::createVFSFromOverlayFiles(
> - ArrayRef<std::string> VFSOverlayFiles, DiagnosticsEngine &Diags,
> + ArrayRef<std::string> VFSOverlayFiles,
> + ArrayRef<std::string> VFSStatCacheFiles, DiagnosticsEngine &Diags,
> IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) {
> + for (const auto &File : VFSStatCacheFiles) {
> + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer =
> + BaseFS->getBufferForFile(File);
> + if (!Buffer) {
> + Diags.Report(diag::err_missing_vfs_stat_cache_file) << File;
> + continue;
> + }
> +
> + auto StatCache =
> + llvm::vfs::StatCacheFileSystem::create(std::move(*Buffer), BaseFS);
> +
> + if (errorToBool(StatCache.takeError()))
> + Diags.Report(diag::err_invalid_vfs_stat_cache) << File;
> + else
> + BaseFS = std::move(*StatCache);
> + }
> +
> if (VFSOverlayFiles.empty())
> return BaseFS;
>
>
> diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
> index 1d6377b5f2d80..cd3775b55a559 100644
> --- a/clang/test/CMakeLists.txt
> +++ b/clang/test/CMakeLists.txt
> @@ -71,6 +71,7 @@ list(APPEND CLANG_TEST_DEPS
> clang-refactor
> clang-
> diff
> clang-scan-deps
> + clang-stat-cache
> diagtool
> hmaptool
> )
>
> diff --git a/clang/test/Driver/vfsstatcache.c b/clang/test/Driver/vfsstatcache.c
> new file mode 100644
> index 0000000000000..ec3c279d53d08
> --- /dev/null
> +++ b/clang/test/Driver/vfsstatcache.c
> @@ -0,0 +1,5 @@
> +// RUN: %clang -ivfsstatcache foo.h -### %s 2>&1 | FileCheck %s
> +// CHECK: "-ivfsstatcache" "foo.h"
> +
> +// RUN: not %clang -ivfsstatcache foo.h %s 2>&1 | FileCheck -check-prefix=CHECK-MISSING %s
> +// CHECK-MISSING: stat cache file 'foo.h' not found
>
> diff --git a/clang/test/clang-stat-cache/cache-effects.c b/clang/test/clang-stat-cache/cache-effects.c
> new file mode 100644
> index 0000000000000..bf2e2db447e2a
> --- /dev/null
> +++ b/clang/test/clang-stat-cache/cache-effects.c
> @@ -0,0 +1,63 @@
> +#include "foo.h"
> +
> +// Testing the effects of a cache is tricky, because it's just supposed to speed
> +// things up, not change the behavior. In this test, we are using an outdated
> +// cache to trick HeaderSearch into finding the wrong module and show that it is
> +// being used.
> +
> +// Clear the module cache.
> +// RUN: rm -rf %t
> +// RUN: mkdir -p %t/Inputs
> +// RUN: mkdir -p %t/Inputs/Foo1
> +// RUN: mkdir -p %t/Inputs/Foo2
> +// RUN: mkdir -p %t/modules-to-compare
> +
> +// ===
> +// Create a Foo module in the Foo1 direcotry.
> +// RUN: echo 'void meow(void);' > %t/Inputs/Foo1/foo.h
> +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo1/module.map
> +
> +// ===
> +// Compile the module. Note that the compiler has 2 header search paths:
> +// Foo2 and Foo1 in that order. The module has been created in Foo1, and
> +// it is the only version available now.
> +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -Rmodule-build %s 2>&1
> +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-before.pcm
> +
> +// ===
> +// Create a stat cache for our inputs directory
> +// RUN: clang-stat-cache %t/Inputs -o %t/stat.cache
> +
> +// ===
> +// As a sanity check, re-run the same compilation with the cache and check that
> +// the module does not change.
> +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache %s -Rmodule-build 2>&1
> +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm
> +
> +// RUN:
> diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm
> +
> +// ===
> +// Now introduce a
> diff erent Foo module in the Foo2 directory which is before
> +// Foo1 in the search paths.
> +// RUN: echo 'void meow2(void);' > %t/Inputs/Foo2/foo.h
> +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo2/module.map
> +
> +// ===
> +// Because we're using the (now-outdated) stat cache, this compilation
> +// should still be using the first module. It will not see the new one
> +// which is earlier in the search paths.
> +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build -Rmodule-import %s 2>&1
> +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm
> +
> +// RUN:
> diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm
> +
> +// ===
> +// Regenerate the stat cache for our Inputs directory
> +// RUN: clang-stat-cache -f %t/Inputs -o %t/stat.cache 2>&1
> +
> +// ===
> +// Use the module and now see that we are recompiling the new one.
> +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build %s 2>&1
> +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm
> +
> +// RUN: not
> diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm
>
> diff --git a/clang/test/clang-stat-cache/errors.test b/clang/test/clang-stat-cache/errors.test
> new file mode 100644
> index 0000000000000..ab73a1348f10e
> --- /dev/null
> +++ b/clang/test/clang-stat-cache/errors.test
> @@ -0,0 +1,42 @@
> +RUN: rm -rf %t
> +RUN: mkdir -p %t
> +
> +RUN: not clang-stat-cache %t/not-there -o %t/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-DIR %s
> +NO-SUCH-DIR: Failed to stat the target directory: {{[Nn]}}o such file or directory
> +
> +RUN: not clang-stat-cache %t -o %t/not-there/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-FILE %s
> +NO-SUCH-FILE: Failed to open cache file: '{{.*}}': {{[Nn]}}o such file or directory
> +
> +# Use mixed-case directories to exercise the case insensitive implementation.
> +RUN: mkdir -p %t/Dir
> +RUN: mkdir -p %t/Dir2
> +
> +# Try to overwrite a few invalid caches
> +RUN: echo "Not a stat cache" > %t/stat.cache
> +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s
> +RUN: echo "Not a stat cache, but bigger than the stat cache header" > %t/stat.cache
> +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s
> +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache
> +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s
> +
> +INVALID-CACHE: The output cache file exists and is not a valid stat cache. Aborting.
> +
> +# Test the force flag
> +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache
> +RUN: clang-stat-cache %t/Dir -f -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE-FORCE %s
> +INVALID-CACHE-FORCE: The output cache file exists and is not a valid stat cache. Forced update.
> +
> +# Generate a valid cache for dir
> +RUN: rm %t/stat.cache
> +RUN: clang-stat-cache %t/Dir -o %t/stat.cache
> +RUN: cp %t/stat.cache %t/stat.cache.save
> +
> +# Try with same base direcotry but with extraneous separators
> +RUN: clang-stat-cache %t/Dir/// -v -o %t/stat.cache | FileCheck --check-prefix=EXTRA-SEP %s
> +EXTRA-SEP-NOT: Existing cache has
> diff erent directory. Regenerating...
> +EXTRA-SEP: Cache up-to-date, exiting
> +
> +# Rewrite the cache with a
> diff erent base directory
> +RUN: clang-stat-cache %t/Dir2 -o %t/stat.cache 2>&1 | FileCheck --check-prefix=OTHER-DIR %s
> +OTHER-DIR: Existing cache has
> diff erent directory. Regenerating...
> +
>
> diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
> index f60db6ef0ba34..147555d87dfc5 100644
> --- a/clang/tools/CMakeLists.txt
> +++ b/clang/tools/CMakeLists.txt
> @@ -15,6 +15,7 @@ add_clang_subdirectory(clang-scan-deps)
> if(HAVE_CLANG_REPL_SUPPORT)
> add_clang_subdirectory(clang-repl)
> endif()
> +add_clang_subdirectory(clang-stat-cache)
>
> add_clang_subdirectory(c-index-test)
>
>
> diff --git a/clang/tools/clang-stat-cache/CMakeLists.txt b/clang/tools/clang-stat-cache/CMakeLists.txt
> new file mode 100644
> index 0000000000000..ab93d8b3d0f0e
> --- /dev/null
> +++ b/clang/tools/clang-stat-cache/CMakeLists.txt
> @@ -0,0 +1,19 @@
> +set(LLVM_LINK_COMPONENTS
> + Core
> + Support
> + )
> +
> +add_clang_tool(clang-stat-cache
> + clang-stat-cache.cpp
> + )
> +
> +if(APPLE)
> +set(CLANG_STAT_CACHE_LIB_DEPS
> + "-framework CoreServices"
> + )
> +endif()
> +
> +clang_target_link_libraries(clang-stat-cache
> + PRIVATE
> + ${CLANG_STAT_CACHE_LIB_DEPS}
> + )
>
> diff --git a/clang/tools/clang-stat-cache/clang-stat-cache.cpp b/clang/tools/clang-stat-cache/clang-stat-cache.cpp
> new file mode 100644
> index 0000000000000..183d0d79fadd8
> --- /dev/null
> +++ b/clang/tools/clang-stat-cache/clang-stat-cache.cpp
> @@ -0,0 +1,318 @@
> +//===- clang-stat-cache.cpp -----------------------------------------------===//
> +//
> +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
> +// See https://llvm.org/LICENSE.txt for license information.
> +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/Support/FileSystem.h"
> +#include "llvm/Support/MemoryBuffer.h"
> +#include "llvm/Support/Path.h"
> +#include "llvm/Support/StatCacheFileSystem.h"
> +#include "llvm/Support/Timer.h"
> +#include "llvm/Support/raw_ostream.h"
> +
> +#include <assert.h>
> +
> +#ifdef __APPLE__
> +#include <CoreServices/CoreServices.h>
> +
> +#include <sys/mount.h>
> +#include <sys/param.h>
> +#endif // __APPLE__
> +
> +// The clang-stat-cache utility creates an on-disk cache for the stat data
> +// of a file-system tree which is expected to be immutable during a build.
> +
> +using namespace llvm;
> +using llvm::vfs::StatCacheFileSystem;
> +
> +cl::OptionCategory StatCacheCategory("clang-stat-cache options");
> +
> +cl::opt<std::string> OutputFilename("o", cl::Required,
> + cl::desc("Specify output filename"),
> + cl::value_desc("filename"),
> + cl::cat(StatCacheCategory));
> +
> +cl::opt<std::string> TargetDirectory(cl::Positional, cl::Required,
> + cl::value_desc("dirname"),
> + cl::cat(StatCacheCategory));
> +
> +cl::opt<bool> Verbose("v", cl::desc("More verbose output"));
> +cl::opt<bool> Force("f", cl::desc("Force cache generation"));
> +
> +#if __APPLE__
> +// Used by checkContentsValidity. See below.
> +struct CallbackInfo {
> + bool SeenChanges = false;
> +};
> +
> +// Used by checkContentsValidity. See below.
> +static void FSEventsCallback(ConstFSEventStreamRef streamRef, void *CtxInfo,
> + size_t numEvents, void *eventPaths,
> + const FSEventStreamEventFlags *eventFlags,
> + const FSEventStreamEventId *eventIds) {
> + CallbackInfo *Info = static_cast<CallbackInfo *>(CtxInfo);
> + for (size_t i = 0; i < numEvents; ++i) {
> + // The kFSEventStreamEventFlagHistoryDone is set on the last 'historical'
> + // event passed to the callback. This means it is passed after the callback
> + // all the relevant activity between the StartEvent of the stream and the
> + // point the stream was created.
> + // If the callback didn't see any other event, it means there haven't been
> + // any alterations to the target directory hierarchy and the cache contents
> + // is still up-to-date.
> + if (eventFlags[i] & kFSEventStreamEventFlagHistoryDone) {
> + // Let's stop the main queue and go back to our non-queue code.
> + CFRunLoopStop(CFRunLoopGetCurrent());
> + break;
> + }
> +
> + // If we see any event outisde of the kFSEventStreamEventFlagHistoryDone
> + // one, there have been changes to the target directory.
> + Info->SeenChanges = true;
> + }
> +}
> +
> +// FSEvents-based check for cache contents validity. We store the latest
> +// FSEventStreamEventId in the cache as a ValidityToken and check if any
> +// file system events affected the base directory since the cache was
> +// generated.
> +static bool checkContentsValidity(uint64_t &ValidityToken) {
> + CFStringRef TargetDir = CFStringCreateWithCStringNoCopy(
> + kCFAllocatorDefault, TargetDirectory.c_str(), kCFStringEncodingASCII,
> + kCFAllocatorNull);
> + CFArrayRef PathsToWatch =
> + CFArrayCreate(nullptr, (const void **)&TargetDir, 1, nullptr);
> + CallbackInfo Info;
> + FSEventStreamContext Ctx = {0, &Info, nullptr, nullptr, nullptr};
> + FSEventStreamRef Stream;
> + CFAbsoluteTime Latency = 0; // Latency in seconds. Do not wait.
> +
> + // Start at the latest event stored in the cache.
> + FSEventStreamEventId StartEvent = ValidityToken;
> + // Update the Validity token with the current latest event.
> + ValidityToken = FSEventsGetCurrentEventId();
> +
> + // Create the stream
> + Stream =
> + FSEventStreamCreate(NULL, &FSEventsCallback, &Ctx, PathsToWatch,
> + StartEvent, Latency, kFSEventStreamCreateFlagNone);
> +
> + // Associate the stream with the main queue.
> + FSEventStreamSetDispatchQueue(Stream, dispatch_get_main_queue());
> + // Start the stream (needs the queue to run to do anything).
> + if (!FSEventStreamStart(Stream)) {
> + errs() << "Failed to create FS event stream. "
> + << "Considering the cache up-to-date.\n";
> + return true;
> + }
> +
> + // Start the main queue. It will be exited by our callback when it got
> + // confirmed it processed all events.
> + CFRunLoopRun();
> +
> + return !Info.SeenChanges;
> +}
> +
> +#else // __APPLE__
> +
> +// There is no cross-platform way to implement a validity check. If this
> +// platform doesn't support it, just consider the cache contents always
> +// valid. When that's the case, the tool running cache generation needs
> +// to have the knowledge to do it only when needed.
> +static bool checkContentsValidity(uint64_t &ValidityToken) { return true; }
> +
> +#endif // __APPLE__
> +
> +// Populate Generator with the stat cache data for the filesystem tree
> +// rooted at BasePath.
> +static std::error_code
> +populateHashTable(StringRef BasePath,
> + StatCacheFileSystem::StatCacheWriter &Generator) {
> + using namespace llvm;
> + using namespace sys::fs;
> +
> + std::error_code ErrorCode;
> +
> + // Just loop over the target directory using a recursive iterator.
> + // This invocation follows symlinks, so we are going to potentially
> + // store the status of the same file multiple times with
> diff erent
> + // names.
> + for (recursive_directory_iterator I(BasePath, ErrorCode), E;
> + I != E && !ErrorCode; I.increment(ErrorCode)) {
> + StringRef Path = I->path();
> + sys::fs::file_status s;
> + // This can fail (broken symlink) and leave the file_status with
> + // its default values. The reader knows this.
> + status(Path, s);
> +
> + Generator.addEntry(Path, s);
> + }
> +
> + return ErrorCode;
> +}
> +
> +static bool checkCacheValid(int FD, raw_fd_ostream &Out,
> + uint64_t &ValidityToken) {
> + sys::fs::file_status Status;
> + auto EC = sys::fs::status(FD, Status);
> + if (EC) {
> + llvm::errs() << "fstat failed: "
> + << llvm::toString(llvm::errorCodeToError(EC)) << "\n";
> + return false;
> + }
> +
> + auto Size = Status.getSize();
> + if (Size == 0) {
> + // New file.
> +#ifdef __APPLE__
> + // Get the current (global) FSEvent id and use this as ValidityToken.
> + ValidityToken = FSEventsGetCurrentEventId();
> +#endif
> + return false;
> + }
> +
> + auto ErrorOrBuffer = MemoryBuffer::getOpenFile(
> + sys::fs::convertFDToNativeFile(FD), OutputFilename, Status.getSize());
> +
> + // Refuse to write to this cache file if it exists but its contents do
> + // not look like a valid cache file.
> + StringRef BaseDir;
> + bool IsCaseSensitive;
> + bool VersionMatch;
> + if (auto E = StatCacheFileSystem::validateCacheFile(
> + (*ErrorOrBuffer)->getMemBufferRef(), BaseDir, IsCaseSensitive,
> + VersionMatch, ValidityToken)) {
> + llvm::errs() << "The output cache file exists and is not a valid stat "
> + "cache.";
> + if (!Force) {
> + llvm::errs() << " Aborting.\n";
> + exit(1);
> + }
> +
> + consumeError(std::move(E));
> + llvm::errs() << " Forced update.\n";
> + return false;
> + }
> +
> + if (BaseDir != TargetDirectory &&
> + (IsCaseSensitive || !BaseDir.equals_insensitive(TargetDirectory))) {
> + llvm::errs() << "Existing cache has
> diff erent directory. Regenerating...\n";
> + return false;
> + }
> +
> + if (!VersionMatch) {
> + llvm::errs()
> + << "Exisitng cache has
> diff erent version number. Regenerating...\n";
> + return false;
> + }
> +
> + // Basic structure checks have passed. Lets see if we can prove that the cache
> + // contents are still valid.
> + bool IsValid = checkContentsValidity(ValidityToken);
> + if (IsValid) {
> + // The cache is valid, but we might have gotten an updated ValidityToken.
> + // Update the cache with it as clang-stat-cache is just going to exit after
> + // returning from this function.
> + StatCacheFileSystem::updateValidityToken(Out, ValidityToken);
> + }
> + return IsValid && !Force;
> +}
> +
> +int main(int argc, char *argv[]) {
> + cl::ParseCommandLineOptions(argc, argv);
> +
> + llvm::SmallString<128> CanonicalDirectory = StringRef(TargetDirectory);
> +
> + // Remove extraneous separators from the end of the basename.
> + while (!CanonicalDirectory.empty() &&
> + sys::path::is_separator(CanonicalDirectory.back()))
> + CanonicalDirectory.pop_back();
> + // Canonicalize separators on Windows
> + llvm::sys::path::make_preferred(CanonicalDirectory);
> + TargetDirectory = std::string(CanonicalDirectory);
> +
> + StringRef Dirname(TargetDirectory);
> +
> + std::error_code EC;
> + int FD;
> + EC = sys::fs::openFileForReadWrite(
> + OutputFilename, FD, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::OF_None);
> + if (EC) {
> + llvm::errs() << "Failed to open cache file: "
> + << toString(llvm::createFileError(OutputFilename, EC)) << "\n";
> + return 1;
> + }
> +
> + raw_fd_ostream Out(FD, /* ShouldClose=*/true);
> +
> + uint64_t ValidityToken = 0;
> + // Check if the cache is valid and up-to-date.
> + if (checkCacheValid(FD, Out, ValidityToken)) {
> + if (Verbose)
> + outs() << "Cache up-to-date, exiting\n";
> + return 0;
> + }
> +
> + if (Verbose)
> + outs() << "Building a stat cache for '" << TargetDirectory << "' into '"
> + << OutputFilename << "'\n";
> +
> + // Do not generate a cache for NFS. Iterating huge directory hierarchies
> + // over NFS will be very slow. Better to let the compiler search only the
> + // pieces that it needs than use a cache that takes ages to populate.
> + bool IsLocal;
> + EC = sys::fs::is_local(Dirname, IsLocal);
> + if (EC) {
> + errs() << "Failed to stat the target directory: "
> + << llvm::toString(llvm::errorCodeToError(EC)) << "\n";
> + return 1;
> + }
> +
> + if (!IsLocal && !Force) {
> + errs() << "Target directory is not a local filesystem. "
> + << "Not populating the cache.\n";
> + return 0;
> + }
> +
> + sys::fs::file_status BaseDirStatus;
> + if (std::error_code EC = status(Dirname, BaseDirStatus)) {
> + errs() << "Failed to stat the target directory: "
> + << llvm::toString(llvm::errorCodeToError(EC)) << "\n";
> + return 1;
> + }
> +
> + // Check if the filesystem hosting the target directory is case sensitive.
> + bool IsCaseSensitive = true;
> +#ifdef _PC_CASE_SENSITIVE
> + IsCaseSensitive =
> + ::pathconf(TargetDirectory.c_str(), _PC_CASE_SENSITIVE) == 1;
> +#endif
> + StatCacheFileSystem::StatCacheWriter Generator(
> + Dirname, BaseDirStatus, IsCaseSensitive, ValidityToken);
> +
> + // Populate the cache.
> + auto startTime = llvm::TimeRecord::getCurrentTime();
> + populateHashTable(Dirname, Generator);
> + auto duration = llvm::TimeRecord::getCurrentTime();
> + duration -= startTime;
> +
> + if (Verbose)
> + errs() << "populateHashTable took: " << duration.getWallTime() << "s\n";
> +
> + // Write the cache to disk.
> + startTime = llvm::TimeRecord::getCurrentTime();
> + int Size = Generator.writeStatCache(Out);
> + duration = llvm::TimeRecord::getCurrentTime();
> + duration -= startTime;
> +
> + if (Verbose)
> + errs() << "writeStatCache took: " << duration.getWallTime() << "s\n";
> +
> + // We might have opened a pre-exising cache which was bigger.
> + llvm::sys::fs::resize_file(FD, Size);
> +
> + return 0;
> +}
>
> diff --git a/llvm/include/llvm/Support/StatCacheFileSystem.h b/llvm/include/llvm/Support/StatCacheFileSystem.h
> new file mode 100644
> index 0000000000000..cf2e06768acd2
> --- /dev/null
> +++ b/llvm/include/llvm/Support/StatCacheFileSystem.h
> @@ -0,0 +1,110 @@
> +//===- StatCacheFileSystem.h - Status Caching Proxy File System -*- C++ -*-===//
> +//
> +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
> +// See https://llvm.org/LICENSE.txt for license information.
> +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_SUPPORT_STATCACHEFILESYSTEM_H
> +#define LLVM_SUPPORT_STATCACHEFILESYSTEM_H
> +
> +#include "llvm/Support/VirtualFileSystem.h"
> +
> +#include <list>
> +
> +namespace llvm {
> +template <typename T> class OnDiskIterableChainedHashTable;
> +template <typename T> class OnDiskChainedHashTableGenerator;
> +
> +namespace vfs {
> +
> +/// A ProxyFileSystem using cached information for status() rather than going to
> +/// the underlying filesystem.
> +///
> +/// When dealing with a huge tree of (mostly) immutable filesystem content
> +/// like an SDK, it can be very costly to ask the underlying filesystem for
> +/// `stat` data. Even when caching the `stat`s internally, having many
> +/// concurrent Clangs accessing the same tree in a similar way causes
> +/// contention. As SDK files are mostly immutable, we can pre-compute the status
> +/// information using clang-stat-cache and use that information directly without
> +/// accessing the real filesystem until Clang needs to open a file. This can
> +/// speed up module verification and HeaderSearch by significant amounts.
> +class StatCacheFileSystem : public llvm::vfs::ProxyFileSystem {
> + class StatCacheLookupInfo;
> + using StatCacheType =
> + llvm::OnDiskIterableChainedHashTable<StatCacheLookupInfo>;
> +
> + class StatCacheGenerationInfo;
> + using StatCacheGeneratorType =
> + llvm::OnDiskChainedHashTableGenerator<StatCacheGenerationInfo>;
> +
> + explicit StatCacheFileSystem(std::unique_ptr<llvm::MemoryBuffer> CacheFile,
> + IntrusiveRefCntPtr<FileSystem> FS,
> + bool IsCaseSensitive);
> +
> +public:
> + /// Create a StatCacheFileSystem from the passed \a CacheBuffer, a
> + /// MemoryBuffer representing the contents of the \a CacheFilename file. The
> + /// returned filesystem will be overlaid on top of \a FS.
> + static Expected<IntrusiveRefCntPtr<StatCacheFileSystem>>
> + create(std::unique_ptr<llvm::MemoryBuffer> CacheBuffer,
> + IntrusiveRefCntPtr<FileSystem> FS);
> +
> + /// The status override which will consult the cache if \a Path is in the
> + /// cached filesystem tree.
> + llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
> +
> +public:
> + /// A helper class to generate stat caches.
> + class StatCacheWriter {
> + llvm::SmallString<128> BaseDir;
> + bool IsCaseSensitive;
> + uint64_t ValidityToken;
> + StatCacheGeneratorType *Generator;
> + std::list<std::string> PathStorage;
> +
> + public:
> + /// Create a StatCacheWriter
> + ///
> + /// \param BaseDir The base directory for the path. Every filename passed to
> + /// addEntry() needs to start with this base directory.
> + /// \param Status The status entry for the base directory.
> + /// \param IsCaseSensitive Whether the cache is case sensitive.
> + /// \param ValidityToken A 64 bits token that gets embedded in the cache and
> + /// can be used by generator tools to check for the
> + /// cache validity in a platform-specific way.
> + StatCacheWriter(StringRef BaseDir, const sys::fs::file_status &Status,
> + bool IsCaseSensitive, uint64_t ValidityToken = 0);
> + ~StatCacheWriter();
> +
> + /// Add a cache entry storing \a Status for the file at \a Path.
> + void addEntry(StringRef Path, const sys::fs::file_status &Status);
> +
> + /// Write the cache file to \a Out.
> + size_t writeStatCache(raw_fd_ostream &Out);
> + };
> +
> +public:
> + /// Validate that the file content in \a Buffer is a valid stat cache file.
> + /// \a BaseDir, \a IsCaseSensitive and \a ValidityToken are output parameters
> + /// that get populated by this call.
> + static Error validateCacheFile(llvm::MemoryBufferRef Buffer,
> + StringRef &BaseDir, bool &IsCaseSensitive,
> + bool &VersionMatch, uint64_t &ValidityToken);
> +
> + /// Update the ValidityToken data in \a CacheFile.
> + static void updateValidityToken(raw_fd_ostream &CacheFile,
> + uint64_t ValidityToken);
> +
> +private:
> + std::unique_ptr<llvm::MemoryBuffer> StatCacheFile;
> + llvm::StringRef StatCachePrefix;
> + std::unique_ptr<StatCacheType> StatCache;
> + bool IsCaseSensitive = true;
> +};
> +
> +} // namespace vfs
> +} // namespace llvm
> +
> +#endif // LLVM_SUPPORT_STATCACHEFILESYSTEM_H
>
> diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
> index 9b5402fa54f0f..16531c4ec78d2 100644
> --- a/llvm/lib/Support/CMakeLists.txt
> +++ b/llvm/lib/Support/CMakeLists.txt
> @@ -212,6 +212,7 @@ add_llvm_component_library(LLVMSupport
> SmallVector.cpp
> SourceMgr.cpp
> SpecialCaseList.cpp
> + StatCacheFileSystem.cpp
> Statistic.cpp
> StringExtras.cpp
> StringMap.cpp
>
> diff --git a/llvm/lib/Support/StatCacheFileSystem.cpp b/llvm/lib/Support/StatCacheFileSystem.cpp
> new file mode 100644
> index 0000000000000..96fd32bf5082c
> --- /dev/null
> +++ b/llvm/lib/Support/StatCacheFileSystem.cpp
> @@ -0,0 +1,306 @@
> +//===- StatCacheFileSystem.cpp - Status Caching Proxy File System ---------===//
> +//
> +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
> +// See https://llvm.org/LICENSE.txt for license information.
> +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/Support/StatCacheFileSystem.h"
> +
> +#include "llvm/ADT/IntrusiveRefCntPtr.h"
> +#include "llvm/Support/ErrorOr.h"
> +#include "llvm/Support/OnDiskHashTable.h"
> +
> +namespace llvm {
> +namespace vfs {
> +
> +class StatCacheFileSystem::StatCacheLookupInfo {
> +public:
> + typedef StringRef external_key_type;
> + typedef StringRef internal_key_type;
> + typedef llvm::sys::fs::file_status data_type;
> + typedef uint32_t hash_value_type;
> + typedef uint32_t offset_type;
> +
> + static bool EqualKey(const internal_key_type &a, const internal_key_type &b) {
> + return a == b;
> + }
> +
> + static hash_value_type ComputeHash(const internal_key_type &a) {
> + return hash_value(a);
> + }
> +
> + static std::pair<unsigned, unsigned>
> + ReadKeyDataLength(const unsigned char *&d) {
> + using namespace llvm::support;
> + unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
> + unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
> + return std::make_pair(KeyLen, DataLen);
> + }
> +
> + static const internal_key_type &GetInternalKey(const external_key_type &x) {
> + return x;
> + }
> +
> + static const external_key_type &GetExternalKey(const internal_key_type &x) {
> + return x;
> + }
> +
> + static internal_key_type ReadKey(const unsigned char *d, unsigned n) {
> + return StringRef((const char *)d, n);
> + }
> +
> + static data_type ReadData(const internal_key_type &k, const unsigned char *d,
> + unsigned DataLen) {
> + data_type Result;
> + memcpy(&Result, d, sizeof(Result));
> + return Result;
> + }
> +};
> +
> +class StatCacheFileSystem::StatCacheGenerationInfo {
> +public:
> + typedef StringRef key_type;
> + typedef const StringRef &key_type_ref;
> + typedef sys::fs::file_status data_type;
> + typedef const sys::fs::file_status &data_type_ref;
> + typedef uint32_t hash_value_type;
> + typedef uint32_t offset_type;
> +
> + /// Calculate the hash for Key
> + static hash_value_type ComputeHash(key_type_ref Key) {
> + return static_cast<size_t>(hash_value(Key));
> + }
> +
> + /// Return the lengths, in bytes, of the given Key/Data pair.
> + static std::pair<unsigned, unsigned>
> + EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data) {
> + using namespace llvm::support;
> + endian::Writer LE(Out, little);
> + unsigned KeyLen = Key.size();
> + unsigned DataLen = sizeof(Data);
> + LE.write<uint16_t>(KeyLen);
> + LE.write<uint16_t>(DataLen);
> + return std::make_pair(KeyLen, DataLen);
> + }
> +
> + static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen) {
> + Out.write(Key.data(), KeyLen);
> + }
> +
> + /// Write Data to Out. DataLen is the length from EmitKeyDataLength.
> + static void EmitData(raw_ostream &Out, key_type_ref Key, data_type_ref Data,
> + unsigned Len) {
> + Out.write((const char *)&Data, Len);
> + }
> +
> + static bool EqualKey(key_type_ref Key1, key_type_ref Key2) {
> + return Key1 == Key2;
> + }
> +};
> +
> +// The format of the stat cache is (pseudo-code):
> +// struct stat_cache {
> +// char Magic[4]; // "STAT" or "Stat"
> +// uint32_t BucketOffset; // See BucketOffset in OnDiskHashTable.h
> +// uint64_t ValidityToken; // Platofrm specific data allowing to check
> +// // whether the cache is up-to-date.
> +// uint32_t Version; // The stat cache format version.
> +// char BaseDir[N]; // Zero terminated path to the base directory
> +// < OnDiskHashtable Data > // Data for the has table. The keys are the
> +// // relative paths under BaseDir. The data is
> +// // llvm::sys::fs::file_status structures.
> +// };
> +
> +#define MAGIC_CASE_SENSITIVE "Stat"
> +#define MAGIC_CASE_INSENSITIVE "STAT"
> +#define STAT_CACHE_VERSION 1
> +
> +namespace {
> +struct StatCacheHeader {
> + char Magic[4];
> + uint32_t BucketOffset;
> + uint64_t ValidityToken;
> + uint32_t Version;
> + char BaseDir[1];
> +};
> +} // namespace
> +
> +StatCacheFileSystem::StatCacheFileSystem(
> + std::unique_ptr<MemoryBuffer> CacheFile, IntrusiveRefCntPtr<FileSystem> FS,
> + bool IsCaseSensitive)
> + : ProxyFileSystem(std::move(FS)), StatCacheFile(std::move(CacheFile)),
> + IsCaseSensitive(IsCaseSensitive) {
> + const char *CacheFileStart = StatCacheFile->getBufferStart();
> + auto *Header = reinterpret_cast<const StatCacheHeader *>(CacheFileStart);
> +
> + uint32_t BucketOffset = Header->BucketOffset;
> + StatCachePrefix = StringRef(Header->BaseDir);
> + // HashTableStart points at the beginning of the data emitted by the
> + // OnDiskHashTable.
> + const unsigned char *HashTableStart = (const unsigned char *)CacheFileStart +
> + StatCachePrefix.size() +
> + sizeof(StatCacheHeader);
> + StatCache.reset(StatCacheType::Create(
> + (const unsigned char *)CacheFileStart + BucketOffset, HashTableStart,
> + (const unsigned char *)CacheFileStart));
> +}
> +
> +Expected<IntrusiveRefCntPtr<StatCacheFileSystem>>
> +StatCacheFileSystem::create(std::unique_ptr<MemoryBuffer> CacheBuffer,
> + IntrusiveRefCntPtr<FileSystem> FS) {
> + StringRef BaseDir;
> + bool IsCaseSensitive;
> + bool VersionMatch;
> + uint64_t ValidityToken;
> + if (auto E = validateCacheFile(*CacheBuffer, BaseDir, IsCaseSensitive,
> + VersionMatch, ValidityToken))
> + return E;
> + if (!VersionMatch) {
> + return createStringError(inconvertibleErrorCode(),
> + CacheBuffer->getBufferIdentifier() +
> + ": Mismatched cache file version");
> + }
> + return new StatCacheFileSystem(std::move(CacheBuffer), FS, IsCaseSensitive);
> +}
> +
> +ErrorOr<Status> StatCacheFileSystem::status(const Twine &Path) {
> + SmallString<180> StringPath;
> + Path.toVector(StringPath);
> + // If the cache is not case sensitive, do all operations on lower-cased paths.
> + if (!IsCaseSensitive)
> + std::transform(StringPath.begin(), StringPath.end(), StringPath.begin(),
> + toLower);
> +
> + // Canonicalize the path. This removes single dot path components,
> + // but it also gets rid of repeating separators.
> + llvm::sys::path::remove_dots(StringPath);
> +
> + // If on Windows, canonicalize separators.
> + llvm::sys::path::make_preferred(StringPath);
> +
> + // Check if the requested path falls into the cache.
> + StringRef SuffixPath(StringPath);
> + if (!SuffixPath.consume_front(StatCachePrefix))
> + return ProxyFileSystem::status(Path);
> +
> + auto It = StatCache->find(SuffixPath);
> + if (It == StatCache->end()) {
> + // We didn't find the file in the cache even though it started with the
> + // cache prefix. It could be that the file doesn't exist, or the spelling
> + // the path is
> diff erent. `remove_dots` canonicalizes the path by removing
> + // `.` and excess separators, but leaves `..` since it isn't semantically
> + // preserving to remove them in the presence of symlinks. If the path
> + // does not contain '..' we can safely say it doesn't exist.
> + if (std::find(sys::path::begin(SuffixPath), sys::path::end(SuffixPath),
> + "..") == sys::path::end(SuffixPath)) {
> + return llvm::errc::no_such_file_or_directory;
> + }
> + return ProxyFileSystem::status(Path);
> + }
> +
> + // clang-stat-cache will record entries for broken symlnks with a default-
> + // constructed Status. This will have a default-constructed UinqueID.
> + if ((*It).getUniqueID() == llvm::sys::fs::UniqueID())
> + return llvm::errc::no_such_file_or_directory;
> +
> + return llvm::vfs::Status::copyWithNewName(*It, Path);
> +}
> +
> +StatCacheFileSystem::StatCacheWriter::StatCacheWriter(
> + StringRef BaseDir, const sys::fs::file_status &Status, bool IsCaseSensitive,
> + uint64_t ValidityToken)
> + : BaseDir(IsCaseSensitive ? BaseDir.str() : BaseDir.lower()),
> + IsCaseSensitive(IsCaseSensitive), ValidityToken(ValidityToken),
> + Generator(new StatCacheGeneratorType()) {
> + addEntry(BaseDir, Status);
> + // If on Windows, canonicalize separators.
> + llvm::sys::path::make_preferred(this->BaseDir);
> +}
> +
> +StatCacheFileSystem::StatCacheWriter::~StatCacheWriter() { delete Generator; }
> +
> +void StatCacheFileSystem::StatCacheWriter::addEntry(
> + StringRef Path, const sys::fs::file_status &Status) {
> + llvm::SmallString<128> StoredPath;
> +
> +#if defined(_WIN32)
> + StoredPath = Path;
> + llvm::sys::path::make_preferred(StoredPath);
> + Path = StoredPath;
> +#endif
> +
> + if (!IsCaseSensitive) {
> + StoredPath = Path.lower();
> + Path = StoredPath;
> + }
> +
> + LLVM_ATTRIBUTE_UNUSED bool Consumed = Path.consume_front(BaseDir);
> + assert(Consumed && "Path does not start with expected prefix.");
> +
> + PathStorage.emplace_back(Path.str());
> + Generator->insert(PathStorage.back(), Status);
> +}
> +
> +size_t
> +StatCacheFileSystem::StatCacheWriter::writeStatCache(raw_fd_ostream &Out) {
> + const uint32_t Version = STAT_CACHE_VERSION;
> + // Magic value.
> + if (IsCaseSensitive)
> + Out.write(MAGIC_CASE_SENSITIVE, 4);
> + else
> + Out.write(MAGIC_CASE_INSENSITIVE, 4);
> + // Placeholder for BucketOffset, filled in below.
> + Out.write("\0\0\0\0", 4);
> + // Write out the validity token.
> + Out.write((const char *)&ValidityToken, sizeof(ValidityToken));
> + // Write out the version.
> + Out.write((const char *)&Version, sizeof(Version));
> + // Write out the base directory for the cache.
> + Out.write(BaseDir.c_str(), BaseDir.size() + 1);
> + // Write out the hashtable data.
> + uint32_t BucketOffset = Generator->Emit(Out);
> + int Size = Out.tell();
> + // Move back to right after the Magic to insert BucketOffset
> + Out.seek(4);
> + Out.write((const char *)&BucketOffset, sizeof(BucketOffset));
> + return Size;
> +}
> +
> +Error StatCacheFileSystem::validateCacheFile(MemoryBufferRef Buffer,
> + StringRef &BaseDir,
> + bool &IsCaseSensitive,
> + bool &VersionMatch,
> + uint64_t &ValidityToken) {
> + auto *Header =
> + reinterpret_cast<const StatCacheHeader *>(Buffer.getBufferStart());
> + if (Buffer.getBufferSize() < sizeof(StatCacheHeader) ||
> + (memcmp(Header->Magic, MAGIC_CASE_INSENSITIVE, sizeof(Header->Magic)) &&
> + memcmp(Header->Magic, MAGIC_CASE_SENSITIVE, sizeof(Header->Magic))) ||
> + Header->BucketOffset > Buffer.getBufferSize())
> + return createStringError(inconvertibleErrorCode(), "Invalid cache file");
> +
> + auto PathLen =
> + strnlen(Header->BaseDir,
> + Buffer.getBufferSize() - offsetof(StatCacheHeader, BaseDir));
> + if (Header->BaseDir[PathLen] != 0)
> + return createStringError(inconvertibleErrorCode(), "Invalid cache file");
> +
> + IsCaseSensitive = Header->Magic[1] == MAGIC_CASE_SENSITIVE[1];
> + VersionMatch = Header->Version == STAT_CACHE_VERSION;
> + BaseDir = StringRef(Header->BaseDir, PathLen);
> + ValidityToken = Header->ValidityToken;
> +
> + return ErrorSuccess();
> +}
> +
> +void StatCacheFileSystem::updateValidityToken(raw_fd_ostream &CacheFile,
> + uint64_t ValidityToken) {
> + CacheFile.pwrite(reinterpret_cast<char *>(&ValidityToken),
> + sizeof(ValidityToken),
> + offsetof(StatCacheHeader, ValidityToken));
> +}
> +
> +} // namespace vfs
> +} // namespace llvm
>
> diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
> index 242bb76865b2c..89fd0aac17f2f 100644
> --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
> +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
> @@ -14,9 +14,11 @@
> #include "llvm/Support/MemoryBuffer.h"
> #include "llvm/Support/Path.h"
> #include "llvm/Support/SourceMgr.h"
> +#include "llvm/Support/StatCacheFileSystem.h"
> #include "llvm/Testing/Support/SupportHelpers.h"
> #include "gmock/gmock.h"
> #include "gtest/gtest.h"
> +#include <list>
> #include <map>
> #include <string>
>
> @@ -3228,3 +3230,306 @@ TEST(RedirectingFileSystemTest, PrintOutput) {
> " DummyFileSystem (RecursiveContents)\n",
> Output);
> }
> +
> +class StatCacheFileSystemTest : public ::testing::Test {
> +public:
> + void SetUp() override {}
> +
> + template <typename StringCollection>
> + void createStatCacheFileSystem(
> + StringRef OutputFile, StringRef BaseDir, bool IsCaseSensitive,
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result,
> + StringCollection &Filenames,
> + IntrusiveRefCntPtr<vfs::FileSystem> Lower = new ErrorDummyFileSystem(),
> + uint64_t ValidityToken = 0) {
> + sys::fs::file_status s;
> + status(BaseDir, s);
> + vfs::StatCacheFileSystem::StatCacheWriter Generator(
> + BaseDir, s, IsCaseSensitive, ValidityToken);
> + std::error_code ErrorCode;
> +
> + Result.reset();
> +
> + // Base path should be present in the stat cache.
> + Filenames.push_back(std::string(BaseDir));
> +
> + for (sys::fs::recursive_directory_iterator I(BaseDir, ErrorCode), E;
> + I != E && !ErrorCode; I.increment(ErrorCode)) {
> + Filenames.push_back(I->path());
> + StringRef Path(Filenames.back().c_str());
> + status(Path, s);
> + Generator.addEntry(Path, s);
> + }
> +
> + {
> + raw_fd_ostream StatCacheFile(OutputFile, ErrorCode);
> + ASSERT_FALSE(ErrorCode);
> + Generator.writeStatCache(StatCacheFile);
> + }
> +
> + loadCacheFile(OutputFile, ValidityToken, Lower, Result);
> + }
> +
> + void loadCacheFile(StringRef OutputFile, uint64_t ExpectedValidityToken,
> + IntrusiveRefCntPtr<vfs::FileSystem> Lower,
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result) {
> + auto ErrorOrBuffer = MemoryBuffer::getFile(OutputFile);
> + EXPECT_TRUE(ErrorOrBuffer);
> + StringRef CacheBaseDir;
> + bool IsCaseSensitive;
> + bool VersionMatch;
> + uint64_t FileValidityToken;
> + auto E = vfs::StatCacheFileSystem::validateCacheFile(
> + (*ErrorOrBuffer)->getMemBufferRef(), CacheBaseDir, IsCaseSensitive,
> + VersionMatch, FileValidityToken);
> + ASSERT_FALSE(E);
> + EXPECT_TRUE(VersionMatch);
> + EXPECT_EQ(FileValidityToken, ExpectedValidityToken);
> + auto ExpectedCache =
> + vfs::StatCacheFileSystem::create(std::move(*ErrorOrBuffer), Lower);
> + ASSERT_FALSE(ExpectedCache.takeError());
> + Result = *ExpectedCache;
> + }
> +
> + template <typename StringCollection>
> + void
> + compareStatCacheToRealFS(IntrusiveRefCntPtr<vfs::StatCacheFileSystem> CacheFS,
> + const StringCollection &Files) {
> + IntrusiveRefCntPtr<vfs::FileSystem> RealFS = vfs::getRealFileSystem();
> +
> + for (auto &File : Files) {
> + auto ErrorOrStatus1 = RealFS->status(File);
> + auto ErrorOrStatus2 = CacheFS->status(File);
> +
> + EXPECT_EQ((bool)ErrorOrStatus1, (bool)ErrorOrStatus2);
> + if (!ErrorOrStatus1 || !ErrorOrStatus2)
> + continue;
> +
> + vfs::Status s1 = *ErrorOrStatus1, s2 = *ErrorOrStatus2;
> + EXPECT_EQ(s1.getName(), s2.getName());
> + EXPECT_EQ(s1.getType(), s2.getType());
> + EXPECT_EQ(s1.getPermissions(), s2.getPermissions());
> + EXPECT_EQ(s1.getLastModificationTime(), s2.getLastModificationTime());
> + EXPECT_EQ(s1.getUniqueID(), s2.getUniqueID());
> + EXPECT_EQ(s1.getUser(), s2.getUser());
> + EXPECT_EQ(s1.getGroup(), s2.getGroup());
> + EXPECT_EQ(s1.getSize(), s2.getSize());
> + }
> + }
> +};
> +
> +TEST_F(StatCacheFileSystemTest, Basic) {
> + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
> + TempDir _a(TestDirectory.path("a"));
> + TempFile _ab(TestDirectory.path("a/b"));
> + TempDir _ac(TestDirectory.path("a/c"));
> + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
> + TempFile _ace(TestDirectory.path("a/c/e"));
> + TempFile _acf(TestDirectory.path("a/c/f"), "", "More dummy contents");
> + TempDir _ag(TestDirectory.path("a/g"));
> + TempFile _agh(TestDirectory.path("a/g/h"));
> +
> + StringRef BaseDir(_a.path());
> +
> + SmallVector<std::string, 10> Filenames;
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ true, StatCacheFS,
> + Filenames);
> + ASSERT_TRUE(StatCacheFS);
> + compareStatCacheToRealFS(StatCacheFS, Filenames);
> +}
> +
> +TEST_F(StatCacheFileSystemTest, CaseSensitivity) {
> + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
> + TempDir _a(TestDirectory.path("a"));
> + TempDir _ac(TestDirectory.path("a/c"));
> + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
> + TempDir _b(TestDirectory.path("B"));
> + TempDir _bc(TestDirectory.path("B/c"));
> + TempFile _bcd(TestDirectory.path("B/c/D"), "", "Dummy contents");
> +
> + StringRef BaseDir(TestDirectory.path());
> + SmallVector<std::string, 10> Filenames;
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ true, StatCacheFS,
> + Filenames);
> + ASSERT_TRUE(StatCacheFS);
> +
> + auto ErrorOrStatus = StatCacheFS->status(_acd.path());
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(_bcd.path());
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d"));
> + EXPECT_FALSE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d"));
> + EXPECT_FALSE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D"));
> + EXPECT_FALSE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d"));
> + EXPECT_FALSE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d"));
> + EXPECT_FALSE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D"));
> + EXPECT_FALSE(ErrorOrStatus);
> +
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ false, StatCacheFS,
> + Filenames);
> + ASSERT_TRUE(StatCacheFS);
> + ErrorOrStatus = StatCacheFS->status(_acd.path());
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(_bcd.path());
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d"));
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d"));
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D"));
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d"));
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d"));
> + EXPECT_TRUE(ErrorOrStatus);
> + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D"));
> + EXPECT_TRUE(ErrorOrStatus);
> +}
> +
> +TEST_F(StatCacheFileSystemTest, DotDot) {
> + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
> + TempDir _a(TestDirectory.path("a"));
> + TempDir _ab(TestDirectory.path("a/b"));
> + TempFile _abd(TestDirectory.path("a/b/d"));
> + TempDir _ac(TestDirectory.path("a/c"));
> + TempFile _acd(TestDirectory.path("a/c/d"));
> +
> + StringRef BaseDir(_a.path());
> + SmallVector<std::string, 10> Filenames;
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
> + auto RealFS = vfs::getRealFileSystem();
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ true, StatCacheFS, Filenames,
> + RealFS);
> + ASSERT_TRUE(StatCacheFS);
> +
> + // Create a file in the cached prefix after the cache was created.
> + TempFile _abe(TestDirectory.path("a/b/e"));
> + // Verify the cache is kicking in.
> + ASSERT_FALSE(StatCacheFS->status(_abe.path()));
> + // We can access the new file using a ".." because the StatCache will
> + // just pass that request to the FileSystem below it.
> + const SmallString<128> PathsToTest[] = {
> + TestDirectory.path("a/b/../e"),
> + TestDirectory.path("a/b/../c/d"),
> + TestDirectory.path("a/b/.."),
> + };
> + compareStatCacheToRealFS(StatCacheFS, PathsToTest);
> +}
> +
> +#ifdef LLVM_ON_UNIX
> +TEST_F(StatCacheFileSystemTest, Links) {
> + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
> + TempDir _a(TestDirectory.path("a"));
> + TempLink _ab("d", TestDirectory.path("a/b"));
> + TempFile _ac(TestDirectory.path("a/c"));
> + TempDir _ad(TestDirectory.path("a/d"));
> + TempFile _add(TestDirectory.path("a/d/d"), "", "Dummy contents");
> + TempFile _ade(TestDirectory.path("a/d/e"));
> + TempFile _adf(TestDirectory.path("a/d/f"), "", "More dummy contents");
> + TempLink _adg(_ad.path(), TestDirectory.path("a/d/g"));
> + TempDir _ah(TestDirectory.path("a/h"));
> + TempLink _ahi(_ad.path(), TestDirectory.path("a/h/i"));
> + TempLink _ahj("no_such_file", TestDirectory.path("a/h/j"));
> +
> + StringRef BaseDir(_a.path());
> +
> + SmallVector<std::string, 10> Filenames;
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ true, StatCacheFS,
> + Filenames);
> + ASSERT_TRUE(StatCacheFS);
> + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
> + TestDirectory.path("a/d/g/g")),
> + Filenames.end());
> + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
> + TestDirectory.path("a/b/e")),
> + Filenames.end());
> + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
> + TestDirectory.path("a/h/i/f")),
> + Filenames.end());
> + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
> + TestDirectory.path("a/h/j")),
> + Filenames.end());
> + compareStatCacheToRealFS(StatCacheFS, Filenames);
> +
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ true, StatCacheFS, Filenames,
> + vfs::getRealFileSystem());
> + const SmallString<128> PathsToTest[] = {
> + TestDirectory.path("a/h/i/../c"),
> + TestDirectory.path("a/b/../d"),
> + TestDirectory.path("a/g/g/../c"),
> + TestDirectory.path("a/b/.."),
> + };
> + compareStatCacheToRealFS(StatCacheFS, PathsToTest);
> +}
> +#endif
> +
> +TEST_F(StatCacheFileSystemTest, Canonical) {
> + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
> + TempDir _a(TestDirectory.path("a"));
> + TempFile _ab(TestDirectory.path("a/b"));
> + TempDir _ac(TestDirectory.path("a/c"));
> + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
> +
> + StringRef BaseDir(_a.path());
> + SmallVector<std::string, 10> Filenames;
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ true, StatCacheFS,
> + Filenames);
> + ASSERT_TRUE(StatCacheFS);
> +
> + const SmallString<128> PathsToTest[] = {
> + TestDirectory.path("./a/b"), TestDirectory.path("a//./b"),
> + TestDirectory.path("a///b"), TestDirectory.path("a//c//d"),
> + TestDirectory.path("a//c/./d"), TestDirectory.path("a/./././b"),
> + TestDirectory.path("a/.//.//.//b"),
> + };
> + compareStatCacheToRealFS(StatCacheFS, PathsToTest);
> +}
> +
> +TEST_F(StatCacheFileSystemTest, ValidityToken) {
> + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
> + TempDir _a(TestDirectory.path("a"));
> + TempFile _ab(TestDirectory.path("a/b"));
> + TempDir _ac(TestDirectory.path("a/c"));
> + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
> +
> + StringRef BaseDir(_a.path());
> + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
> + {
> + SmallVector<std::string, 10> Filenames;
> + uint64_t ValidityToken = 0x1234567890abcfef;
> + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
> + /* IsCaseSensitive= */ true, StatCacheFS,
> + Filenames, new DummyFileSystem(), ValidityToken);
> + ASSERT_TRUE(StatCacheFS);
> + }
> +
> + uint64_t UpdatedValidityToken = 0xabcdef0123456789;
> + {
> + std::error_code EC;
> + raw_fd_ostream CacheFile(TestDirectory.path("stat.cache"), EC,
> + sys::fs::CD_OpenAlways);
> + ASSERT_FALSE(EC);
> + vfs::StatCacheFileSystem::updateValidityToken(CacheFile,
> + UpdatedValidityToken);
> + }
> +
> + loadCacheFile(TestDirectory.path("stat.cache"), UpdatedValidityToken,
> + new DummyFileSystem(), StatCacheFS);
> + EXPECT_TRUE(StatCacheFS);
> +}
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list