[clang-tools-extra] ca87553 - Reland(2) "[clangd] Indexing of standard library"
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Wed May 18 06:30:47 PDT 2022
Author: Sam McCall
Date: 2022-05-18T15:30:37+02:00
New Revision: ca875539f788c8063e243ce9ceb877a0d2ad9115
URL: https://github.com/llvm/llvm-project/commit/ca875539f788c8063e243ce9ceb877a0d2ad9115
DIFF: https://github.com/llvm/llvm-project/commit/ca875539f788c8063e243ce9ceb877a0d2ad9115.diff
LOG: Reland(2) "[clangd] Indexing of standard library"
This reverts commit 6aabf60f2fb7589430c0ecc8fe95913c973fa248.
Added:
clang-tools-extra/clangd/index/StdLib.cpp
clang-tools-extra/clangd/index/StdLib.h
clang-tools-extra/clangd/unittests/StdLibTests.cpp
Modified:
clang-tools-extra/clangd/CMakeLists.txt
clang-tools-extra/clangd/ClangdServer.cpp
clang-tools-extra/clangd/ClangdServer.h
clang-tools-extra/clangd/Config.h
clang-tools-extra/clangd/ConfigCompile.cpp
clang-tools-extra/clangd/ConfigFragment.h
clang-tools-extra/clangd/ConfigYAML.cpp
clang-tools-extra/clangd/TUScheduler.cpp
clang-tools-extra/clangd/TUScheduler.h
clang-tools-extra/clangd/index/FileIndex.cpp
clang-tools-extra/clangd/index/FileIndex.h
clang-tools-extra/clangd/index/SymbolOrigin.cpp
clang-tools-extra/clangd/index/SymbolOrigin.h
clang-tools-extra/clangd/unittests/CMakeLists.txt
clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 9c37cfe7b7001..7cfbd6f95750e 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -119,6 +119,7 @@ add_clang_library(clangDaemon
index/Ref.cpp
index/Relation.cpp
index/Serialization.cpp
+ index/StdLib.cpp
index/Symbol.cpp
index/SymbolCollector.cpp
index/SymbolID.cpp
diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 80d7d5c5ece19..69a0f63972aae 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -26,6 +26,7 @@
#include "index/CanonicalIncludes.h"
#include "index/FileIndex.h"
#include "index/Merge.h"
+#include "index/StdLib.h"
#include "refactor/Rename.h"
#include "refactor/Tweak.h"
#include "support/Cancellation.h"
@@ -59,16 +60,39 @@ namespace {
// Update the FileIndex with new ASTs and plumb the diagnostics responses.
struct UpdateIndexCallbacks : public ParsingCallbacks {
UpdateIndexCallbacks(FileIndex *FIndex,
- ClangdServer::Callbacks *ServerCallbacks)
- : FIndex(FIndex), ServerCallbacks(ServerCallbacks) {}
+ ClangdServer::Callbacks *ServerCallbacks,
+ const ThreadsafeFS &TFS, AsyncTaskRunner *Tasks)
+ : FIndex(FIndex), ServerCallbacks(ServerCallbacks), TFS(TFS),
+ Tasks(Tasks) {}
- void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx,
+ void onPreambleAST(PathRef Path, llvm::StringRef Version,
+ const CompilerInvocation &CI, ASTContext &Ctx,
Preprocessor &PP,
const CanonicalIncludes &CanonIncludes) override {
+ // If this preamble uses a standard library we haven't seen yet, index it.
+ if (FIndex)
+ if (auto Loc = Stdlib.add(*CI.getLangOpts(), PP.getHeaderSearchInfo()))
+ indexStdlib(CI, std::move(*Loc));
+
if (FIndex)
FIndex->updatePreamble(Path, Version, Ctx, PP, CanonIncludes);
}
+ void indexStdlib(const CompilerInvocation &CI, StdLibLocation Loc) {
+ auto Task = [this, LO(*CI.getLangOpts()), Loc(std::move(Loc)),
+ CI(std::make_unique<CompilerInvocation>(CI))]() mutable {
+ IndexFileIn IF;
+ IF.Symbols = indexStandardLibrary(std::move(CI), Loc, TFS);
+ if (Stdlib.isBest(LO))
+ FIndex->updatePreamble(std::move(IF));
+ };
+ if (Tasks)
+ // This doesn't have a semaphore to enforce -j, but it's rare.
+ Tasks->runAsync("IndexStdlib", std::move(Task));
+ else
+ Task();
+ }
+
void onMainAST(PathRef Path, ParsedAST &AST, PublishFn Publish) override {
if (FIndex)
FIndex->updateMain(Path, AST);
@@ -103,6 +127,9 @@ struct UpdateIndexCallbacks : public ParsingCallbacks {
private:
FileIndex *FIndex;
ClangdServer::Callbacks *ServerCallbacks;
+ const ThreadsafeFS &TFS;
+ StdLibSet Stdlib;
+ AsyncTaskRunner *Tasks;
};
class DraftStoreFS : public ThreadsafeFS {
@@ -154,12 +181,15 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB,
Transient(Opts.ImplicitCancellation ? TUScheduler::InvalidateOnUpdate
: TUScheduler::NoInvalidation),
DirtyFS(std::make_unique<DraftStoreFS>(TFS, DraftMgr)) {
+ if (Opts.AsyncThreadsCount != 0)
+ IndexTasks.emplace();
// Pass a callback into `WorkScheduler` to extract symbols from a newly
// parsed file and rebuild the file index synchronously each time an AST
// is parsed.
- WorkScheduler.emplace(
- CDB, TUScheduler::Options(Opts),
- std::make_unique<UpdateIndexCallbacks>(DynamicIdx.get(), Callbacks));
+ WorkScheduler.emplace(CDB, TUScheduler::Options(Opts),
+ std::make_unique<UpdateIndexCallbacks>(
+ DynamicIdx.get(), Callbacks, TFS,
+ IndexTasks ? IndexTasks.getPointer() : nullptr));
// Adds an index to the stack, at higher priority than existing indexes.
auto AddIndex = [&](SymbolIndex *Idx) {
if (this->Index != nullptr) {
@@ -975,6 +1005,9 @@ ClangdServer::blockUntilIdleForTest(llvm::Optional<double> TimeoutSeconds) {
// and we're blocking the main thread.
if (!WorkScheduler->blockUntilIdle(timeoutSeconds(TimeoutSeconds)))
return false;
+ // TUScheduler is the only thing that starts background indexing work.
+ if (IndexTasks && !IndexTasks->wait(timeoutSeconds(TimeoutSeconds)))
+ return false;
// Unfortunately we don't have strict topological order between the rest of
// the components. E.g. CDB broadcast triggers backrgound indexing.
diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h
index 6d999722805ed..e73454901cff0 100644
--- a/clang-tools-extra/clangd/ClangdServer.h
+++ b/clang-tools-extra/clangd/ClangdServer.h
@@ -428,6 +428,7 @@ class ClangdServer {
mutable std::mutex CachedCompletionFuzzyFindRequestMutex;
llvm::Optional<std::string> WorkspaceRoot;
+ llvm::Optional<AsyncTaskRunner> IndexTasks; // for stdlib indexing.
llvm::Optional<TUScheduler> WorkScheduler;
// Invalidation policy used for actions that we assume are "transient".
TUScheduler::ASTActionInvalidation Transient;
diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h
index 734dce43c5873..ec7247121d5cd 100644
--- a/clang-tools-extra/clangd/Config.h
+++ b/clang-tools-extra/clangd/Config.h
@@ -81,11 +81,12 @@ struct Config {
/// forward-slashes.
std::string MountPoint;
};
- /// Controls background-index behavior.
+ /// Controls index behavior.
struct {
- /// Whether this TU should be indexed.
+ /// Whether this TU should be background-indexed.
BackgroundPolicy Background = BackgroundPolicy::Build;
ExternalIndexSpec External;
+ bool StandardLibrary = false;
} Index;
enum UnusedIncludesPolicy { Strict, None };
diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index a4d7904781e4f..0cdbc5526e6e5 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -332,6 +332,11 @@ struct FragmentCompiler {
}
if (F.External)
compile(std::move(**F.External), F.External->Range);
+ if (F.StandardLibrary)
+ Out.Apply.push_back(
+ [Val(**F.StandardLibrary)](const Params &, Config &C) {
+ C.Index.StandardLibrary = Val;
+ });
}
void compile(Fragment::IndexBlock::ExternalBlock &&External,
diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
index 34bff844cb26c..5950f8ff655c3 100644
--- a/clang-tools-extra/clangd/ConfigFragment.h
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -199,6 +199,9 @@ struct Fragment {
llvm::Optional<Located<std::string>> MountPoint;
};
llvm::Optional<Located<ExternalBlock>> External;
+ // Whether the standard library visible from this file should be indexed.
+ // This makes all standard library symbols available, included or not.
+ llvm::Optional<Located<bool>> StandardLibrary;
};
IndexBlock Index;
diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index ec39bb9686640..cec60756a3431 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -184,6 +184,10 @@ class Parser {
F.External.emplace(std::move(External));
F.External->Range = N.getSourceRange();
});
+ Dict.handle("StandardLibrary", [&](Node &N) {
+ if (auto StandardLibrary = boolValue(N, "StandardLibrary"))
+ F.StandardLibrary = *StandardLibrary;
+ });
Dict.parse(N);
}
diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp
index 19d4ca5a48a50..f60fbfaa479f9 100644
--- a/clang-tools-extra/clangd/TUScheduler.cpp
+++ b/clang-tools-extra/clangd/TUScheduler.cpp
@@ -1013,9 +1013,10 @@ void PreambleThread::build(Request Req) {
bool IsFirstPreamble = !LatestBuild;
LatestBuild = clang::clangd::buildPreamble(
FileName, *Req.CI, Inputs, StoreInMemory,
- [this, Version(Inputs.Version)](ASTContext &Ctx, Preprocessor &PP,
- const CanonicalIncludes &CanonIncludes) {
- Callbacks.onPreambleAST(FileName, Version, Ctx, PP, CanonIncludes);
+ [&](ASTContext &Ctx, Preprocessor &PP,
+ const CanonicalIncludes &CanonIncludes) {
+ Callbacks.onPreambleAST(FileName, Inputs.Version, *Req.CI, Ctx, PP,
+ CanonIncludes);
},
&Stats);
if (!LatestBuild)
diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h
index ceb7ea0f0239a..a852199ba7cb7 100644
--- a/clang-tools-extra/clangd/TUScheduler.h
+++ b/clang-tools-extra/clangd/TUScheduler.h
@@ -133,8 +133,8 @@ class ParsingCallbacks {
/// contains only AST nodes from the #include directives at the start of the
/// file. AST node in the current file should be observed on onMainAST call.
virtual void onPreambleAST(PathRef Path, llvm::StringRef Version,
- ASTContext &Ctx, Preprocessor &PP,
- const CanonicalIncludes &) {}
+ const CompilerInvocation &CI, ASTContext &Ctx,
+ Preprocessor &PP, const CanonicalIncludes &) {}
/// The argument function is run under the critical section guarding against
/// races when closing the files.
diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index 72f7c0801250b..dcfc4b5981fa2 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -425,12 +425,7 @@ FileIndex::FileIndex()
MainFileSymbols(IndexContents::All),
MainFileIndex(std::make_unique<MemIndex>()) {}
-void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
- ASTContext &AST, Preprocessor &PP,
- const CanonicalIncludes &Includes) {
- IndexFileIn IF;
- std::tie(IF.Symbols, std::ignore, IF.Relations) =
- indexHeaderSymbols(Version, AST, PP, Includes);
+void FileIndex::updatePreamble(IndexFileIn IF) {
FileShardedIndex ShardedIndex(std::move(IF));
for (auto Uri : ShardedIndex.getAllSources()) {
auto IF = ShardedIndex.getShard(Uri);
@@ -461,6 +456,15 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
}
}
+void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
+ ASTContext &AST, Preprocessor &PP,
+ const CanonicalIncludes &Includes) {
+ IndexFileIn IF;
+ std::tie(IF.Symbols, std::ignore, IF.Relations) =
+ indexHeaderSymbols(Version, AST, PP, Includes);
+ updatePreamble(std::move(IF));
+}
+
void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
auto Contents = indexMainDecls(AST);
MainFileSymbols.update(
diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h
index 4c6f965e78013..24ffbc9c7fb52 100644
--- a/clang-tools-extra/clangd/index/FileIndex.h
+++ b/clang-tools-extra/clangd/index/FileIndex.h
@@ -114,6 +114,7 @@ class FileIndex : public MergedIndex {
/// and macros in \p PP.
void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
Preprocessor &PP, const CanonicalIncludes &Includes);
+ void updatePreamble(IndexFileIn);
/// Update symbols and references from main file \p Path with
/// `indexMainDecls`.
diff --git a/clang-tools-extra/clangd/index/StdLib.cpp b/clang-tools-extra/clangd/index/StdLib.cpp
new file mode 100644
index 0000000000000..64689418464b3
--- /dev/null
+++ b/clang-tools-extra/clangd/index/StdLib.cpp
@@ -0,0 +1,363 @@
+//===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "StdLib.h"
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "Compiler.h"
+#include "Config.h"
+#include "SymbolCollector.h"
+#include "index/IndexAction.h"
+#include "support/Logger.h"
+#include "support/ThreadsafeFS.h"
+#include "support/Trace.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+enum Lang { C, CXX };
+
+Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; }
+llvm::StringLiteral mandatoryHeader(Lang L) {
+ switch (L) {
+ case C:
+ return "stdio.h";
+ case CXX:
+ return "vector";
+ }
+ llvm_unreachable("unhandled Lang");
+}
+
+LangStandard::Kind standardFromOpts(const LangOptions &LO) {
+ if (LO.CPlusPlus) {
+ if (LO.CPlusPlus2b)
+ return LangStandard::lang_cxx2b;
+ if (LO.CPlusPlus20)
+ return LangStandard::lang_cxx20;
+ if (LO.CPlusPlus17)
+ return LangStandard::lang_cxx17;
+ if (LO.CPlusPlus14)
+ return LangStandard::lang_cxx14;
+ if (LO.CPlusPlus11)
+ return LangStandard::lang_cxx11;
+ return LangStandard::lang_cxx98;
+ }
+ if (LO.C2x)
+ return LangStandard::lang_c2x;
+ // C17 has no new features, so treat {C11,C17} as C17.
+ if (LO.C11)
+ return LangStandard::lang_c17;
+ return LangStandard::lang_c99;
+}
+
+std::string buildUmbrella(llvm::StringLiteral Mandatory,
+ std::vector<llvm::StringLiteral> Headers) {
+ std::string Result;
+ llvm::raw_string_ostream OS(Result);
+
+ // We __has_include guard all our #includes to avoid errors when using older
+ // stdlib version that don't have headers for the newest language standards.
+ // But make sure we get *some* error if things are totally broken.
+ OS << llvm::formatv(
+ "#if !__has_include(<{0}>)\n"
+ "#error Mandatory header <{0}> not found in standard library!\n"
+ "#endif\n",
+ Mandatory);
+
+ llvm::sort(Headers.begin(), Headers.end());
+ auto Last = std::unique(Headers.begin(), Headers.end());
+ for (auto Header = Headers.begin(); Header != Last; ++Header) {
+ OS << llvm::formatv("#if __has_include({0})\n"
+ "#include {0}\n"
+ "#endif\n",
+ *Header);
+ }
+ OS.flush();
+ return Result;
+}
+
+} // namespace
+
+llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) {
+ // The umbrella header is the same for all versions of each language.
+ // Headers that are unsupported in old lang versions are usually guarded by
+ // #if. Some headers may be not present in old stdlib versions, the umbrella
+ // header guards with __has_include for this purpose.
+ Lang L = langFromOpts(LO);
+ switch (L) {
+ case CXX:
+ static std::string *UmbrellaCXX =
+ new std::string(buildUmbrella(mandatoryHeader(L), {
+#define SYMBOL(Name, NameSpace, Header) #Header,
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+ }));
+ return *UmbrellaCXX;
+ case C:
+ static std::string *UmbrellaC =
+ new std::string(buildUmbrella(mandatoryHeader(L), {
+#define SYMBOL(Name, NameSpace, Header) #Header,
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#undef SYMBOL
+ }));
+ return *UmbrellaC;
+ }
+}
+
+namespace {
+
+// Including the standard library leaks unwanted transitively included symbols.
+//
+// We want to drop these, they're a bit tricky to identify:
+// - we don't want to limit to symbols on our list, as our list has only
+// top-level symbols (and there may be legitimate stdlib extensions).
+// - we can't limit to only symbols defined in known stdlib headers, as stdlib
+// internal structure is murky
+// - we can't strictly require symbols to come from a particular path, e.g.
+// libstdc++ is mostly under /usr/include/c++/10/...
+// but std::ctype_base is under /usr/include/<platform>/c++/10/...
+// We require the symbol to come from a header that is *either* from
+// the standard library path (as identified by the location of <vector>), or
+// another header that defines a symbol from our stdlib list.
+SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) {
+ SymbolSlab::Builder Result;
+
+ static auto &StandardHeaders = *[] {
+ auto *Set = new llvm::DenseSet<llvm::StringRef>();
+ for (llvm::StringRef Header : {
+#define SYMBOL(Name, NameSpace, Header) #Header,
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+ })
+ Set->insert(Header);
+ return Set;
+ }();
+
+ // Form prefixes like file:///usr/include/c++/10/
+ // These can be trivially prefix-compared with URIs in the indexed symbols.
+ llvm::SmallVector<std::string> StdLibURIPrefixes;
+ for (const auto &Path : Loc.Paths) {
+ StdLibURIPrefixes.push_back(URI::create(Path).toString());
+ if (StdLibURIPrefixes.back().back() != '/')
+ StdLibURIPrefixes.back().push_back('/');
+ }
+ // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
+ // owner of a symbol whose insertable header is in StandardHeaders?
+ // Pointer key because strings in a SymbolSlab are interned.
+ llvm::DenseMap<const char *, bool> GoodHeader;
+ for (const Symbol &S : Slab) {
+ if (!S.IncludeHeaders.empty() &&
+ StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) {
+ GoodHeader[S.CanonicalDeclaration.FileURI] = true;
+ GoodHeader[S.Definition.FileURI] = true;
+ continue;
+ }
+ for (const char *URI :
+ {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) {
+ auto R = GoodHeader.try_emplace(URI, false);
+ if (R.second) {
+ R.first->second = llvm::any_of(
+ StdLibURIPrefixes,
+ [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) {
+ return URIStr.startswith(Prefix);
+ });
+ }
+ }
+ }
+#ifndef NDEBUG
+ for (const auto &Good : GoodHeader)
+ if (Good.second && *Good.first)
+ dlog("Stdlib header: {0}", Good.first);
+#endif
+ // Empty URIs aren't considered good. (Definition can be blank).
+ auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); };
+
+ for (const Symbol &S : Slab) {
+ if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) ||
+ IsGoodHeader(S.Definition.FileURI))) {
+ dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name,
+ S.CanonicalDeclaration.FileURI);
+ continue;
+ }
+ Result.insert(S);
+ }
+
+ return std::move(Result).build();
+}
+
+} // namespace
+
+SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
+ std::unique_ptr<CompilerInvocation> CI,
+ const StdLibLocation &Loc,
+ const ThreadsafeFS &TFS) {
+ if (CI->getFrontendOpts().Inputs.size() != 1 ||
+ !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) {
+ elog("Indexing standard library failed: bad CompilerInvocation");
+ assert(false && "indexing stdlib with a dubious CompilerInvocation!");
+ return SymbolSlab();
+ }
+ const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front();
+ trace::Span Tracer("StandardLibraryIndex");
+ LangStandard::Kind LangStd = standardFromOpts(*CI->getLangOpts());
+ log("Indexing {0} standard library in the context of {1}",
+ LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile());
+
+ SymbolSlab Symbols;
+ IgnoreDiagnostics IgnoreDiags;
+ // CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
+ CI->getPreprocessorOpts().clearRemappedFiles();
+ auto Clang = prepareCompilerInstance(
+ std::move(CI), /*Preamble=*/nullptr,
+ llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()),
+ TFS.view(/*CWD=*/llvm::None), IgnoreDiags);
+ if (!Clang) {
+ elog("Standard Library Index: Couldn't build compiler instance");
+ return Symbols;
+ }
+
+ SymbolCollector::Options IndexOpts;
+ IndexOpts.Origin = SymbolOrigin::StdLib;
+ IndexOpts.CollectMainFileSymbols = false;
+ IndexOpts.CollectMainFileRefs = false;
+ IndexOpts.CollectMacro = true;
+ IndexOpts.StoreAllDocumentation = true;
+ // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope.
+ // Files from outside the StdLibLocation may define true std symbols anyway.
+ // We end up "blessing" such headers, and can only do that by indexing
+ // everything first.
+
+ // Refs, relations, include graph in the stdlib mostly aren't useful.
+ auto Action = createStaticIndexingAction(
+ IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr,
+ nullptr, nullptr);
+
+ if (!Action->BeginSourceFile(*Clang, Input)) {
+ elog("Standard Library Index: BeginSourceFile() failed");
+ return Symbols;
+ }
+
+ if (llvm::Error Err = Action->Execute()) {
+ elog("Standard Library Index: Execute failed: {0}", std::move(Err));
+ return Symbols;
+ }
+
+ Action->EndSourceFile();
+
+ unsigned SymbolsBeforeFilter = Symbols.size();
+ Symbols = filter(std::move(Symbols), Loc);
+ bool Errors = Clang->hasDiagnostics() &&
+ Clang->getDiagnostics().hasUncompilableErrorOccurred();
+ log("Indexed {0} standard library{3}: {1} symbols, {2} filtered",
+ LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(),
+ SymbolsBeforeFilter - Symbols.size(),
+ Errors ? " (incomplete due to errors)" : "");
+ SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
+ return Symbols;
+}
+
+SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
+ const StdLibLocation &Loc,
+ const ThreadsafeFS &TFS) {
+ return indexStandardLibrary(
+ getStdlibUmbrellaHeader(*Invocation->getLangOpts()),
+ std::move(Invocation), Loc, TFS);
+}
+
+bool StdLibSet::isBest(const LangOptions &LO) const {
+ return standardFromOpts(LO) >=
+ Best[langFromOpts(LO)].load(std::memory_order_acquire);
+}
+
+llvm::Optional<StdLibLocation> StdLibSet::add(const LangOptions &LO,
+ const HeaderSearch &HS) {
+ Lang L = langFromOpts(LO);
+ int OldVersion = Best[L].load(std::memory_order_acquire);
+ int NewVersion = standardFromOpts(LO);
+ dlog("Index stdlib? {0}",
+ LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName());
+
+ if (!Config::current().Index.StandardLibrary) {
+ dlog("No: disabled in config");
+ return llvm::None;
+ }
+
+ if (NewVersion <= OldVersion) {
+ dlog("No: have {0}, {1}>={2}",
+ LangStandard::getLangStandardForKind(
+ static_cast<LangStandard::Kind>(NewVersion))
+ .getName(),
+ OldVersion, NewVersion);
+ return llvm::None;
+ }
+
+ // We'd like to index a standard library here if there is one.
+ // Check for the existence of <vector> on the search path.
+ // We could cache this, but we only get here repeatedly when there's no
+ // stdlib, and even then only once per preamble build.
+ llvm::StringLiteral ProbeHeader = mandatoryHeader(L);
+ llvm::SmallString<256> Path; // Scratch space.
+ llvm::SmallVector<std::string> SearchPaths;
+ auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) {
+ llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath);
+ if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path))
+ SearchPaths.emplace_back(Path);
+ };
+ for (const auto &DL :
+ llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) {
+ switch (DL.getLookupType()) {
+ case DirectoryLookup::LT_NormalDir: {
+ Path = DL.getDir()->getName();
+ llvm::sys::path::append(Path, ProbeHeader);
+ llvm::vfs::Status Stat;
+ if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) &&
+ Stat.isRegularFile())
+ RecordHeaderPath(Path);
+ break;
+ }
+ case DirectoryLookup::LT_Framework:
+ // stdlib can't be a framework (framework includes must have a slash)
+ continue;
+ case DirectoryLookup::LT_HeaderMap:
+ llvm::StringRef Target =
+ DL.getHeaderMap()->lookupFilename(ProbeHeader, Path);
+ if (!Target.empty())
+ RecordHeaderPath(Target);
+ break;
+ }
+ }
+ if (SearchPaths.empty())
+ return llvm::None;
+
+ dlog("Found standard library in {0}", llvm::join(SearchPaths, ", "));
+
+ while (!Best[L].compare_exchange_weak(OldVersion, NewVersion,
+ std::memory_order_acq_rel))
+ if (OldVersion >= NewVersion) {
+ dlog("No: lost the race");
+ return llvm::None; // Another thread won the race while we were checking.
+ }
+
+ dlog("Yes, index stdlib!");
+ return StdLibLocation{std::move(SearchPaths)};
+}
+
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/index/StdLib.h b/clang-tools-extra/clangd/index/StdLib.h
new file mode 100644
index 0000000000000..6df30ace669c8
--- /dev/null
+++ b/clang-tools-extra/clangd/index/StdLib.h
@@ -0,0 +1,110 @@
+//===--- StdLib.h - Index the C and C++ standard library ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Eagerly indexing the standard library gives a much friendlier "warm start"
+// with working code completion in a standalone file or small project.
+//
+// We act as if we saw a file which included the whole standard library:
+// #include <array>
+// #include <bitset>
+// #include <chrono>
+// ...
+// We index this TU and feed the result into the dynamic index.
+//
+// This happens within the context of some particular open file, and we reuse
+// its CompilerInvocation. Matching its include path, LangOpts etc ensures that
+// we see the standard library and configuration that matches the project.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H
+
+#include "index/Symbol.h"
+#include "support/ThreadsafeFS.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace clang {
+class CompilerInvocation;
+class LangOptions;
+class HeaderSearch;
+namespace clangd {
+
+// The filesystem location where a standard library was found.
+//
+// This is the directory containing <vector> or <stdio.h>.
+// It's used to ensure we only index files that are in the standard library.
+//
+// The paths are canonicalized (FS "real path" with symlinks resolved).
+// This allows them to be easily compared against paths the indexer returns.
+struct StdLibLocation {
+ llvm::SmallVector<std::string> Paths;
+};
+
+// Tracks the state of standard library indexing within a particular index.
+//
+// In general, we don't want to index the standard library multiple times.
+// In most cases, this class just acts as a flag to ensure we only do it once.
+//
+// However, if we first open a C++11 file, and then a C++20 file, we *do*
+// want the index to be upgraded to include the extra symbols.
+// Similarly, the C and C++ standard library can coexist.
+class StdLibSet {
+ std::atomic<int> Best[2] = {{-1}, {-1}};
+
+public:
+ // Determines if we should index the standard library in a configuration.
+ //
+ // This is true if:
+ // - standard library indexing is enabled for the file
+ // - the language version is higher than any previous add() for the language
+ // - the standard library headers exist on the search path
+ // Returns the location where the standard library was found.
+ //
+ // This function is threadsafe.
+ llvm::Optional<StdLibLocation> add(const LangOptions &, const HeaderSearch &);
+
+ // Indicates whether a built index should be used.
+ // It should not be used if a newer version has subsequently been added.
+ //
+ // Intended pattern is:
+ // if (add()) {
+ // symbols = indexStandardLibrary();
+ // if (isBest())
+ // index.update(symbols);
+ // }
+ //
+ // This is still technically racy: we could return true here, then another
+ // thread could add->index->update a better library before we can update.
+ // We'd then overwrite it with the older version.
+ // However, it's very unlikely: indexing takes a long time.
+ bool isBest(const LangOptions &) const;
+};
+
+// Index a standard library and return the discovered symbols.
+//
+// The compiler invocation should describe the file whose config we're reusing.
+// We overwrite its virtual buffer with a lot of #include statements.
+SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
+ const StdLibLocation &Loc,
+ const ThreadsafeFS &TFS);
+
+// Variant that allows the umbrella header source to be specified.
+// Exposed for testing.
+SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
+ std::unique_ptr<CompilerInvocation> CI,
+ const StdLibLocation &Loc,
+ const ThreadsafeFS &TFS);
+
+// Generate header containing #includes for all standard library headers.
+// Exposed for testing.
+llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &);
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H
diff --git a/clang-tools-extra/clangd/index/SymbolOrigin.cpp b/clang-tools-extra/clangd/index/SymbolOrigin.cpp
index 46a84f2ca9846..e893ff78b8aed 100644
--- a/clang-tools-extra/clangd/index/SymbolOrigin.cpp
+++ b/clang-tools-extra/clangd/index/SymbolOrigin.cpp
@@ -14,7 +14,7 @@ namespace clangd {
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, SymbolOrigin O) {
if (O == SymbolOrigin::Unknown)
return OS << "unknown";
- constexpr static char Sigils[] = "AOSMIRP7B9012345";
+ constexpr static char Sigils[] = "AOSMIRP7BL012345";
for (unsigned I = 0; I < sizeof(Sigils); ++I)
if (static_cast<uint16_t>(O) & 1u << I)
OS << Sigils[I];
diff --git a/clang-tools-extra/clangd/index/SymbolOrigin.h b/clang-tools-extra/clangd/index/SymbolOrigin.h
index 18e3616d5a9cc..2e7a3fa745e22 100644
--- a/clang-tools-extra/clangd/index/SymbolOrigin.h
+++ b/clang-tools-extra/clangd/index/SymbolOrigin.h
@@ -29,6 +29,7 @@ enum class SymbolOrigin : uint16_t {
Preamble = 1 << 6, // From the dynamic index of preambles.
// 7 reserved
Background = 1 << 8, // From the automatic project index.
+ StdLib = 1 << 9, // Standard library index.
};
inline SymbolOrigin operator|(SymbolOrigin A, SymbolOrigin B) {
diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt
index 8309be64ef238..692d7f8038d95 100644
--- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -81,6 +81,7 @@ add_unittest(ClangdUnitTests ClangdTests
SemanticSelectionTests.cpp
SerializationTests.cpp
SourceCodeTests.cpp
+ StdLibTests.cpp
SymbolCollectorTests.cpp
SymbolInfoTests.cpp
SyncAPI.cpp
diff --git a/clang-tools-extra/clangd/unittests/StdLibTests.cpp b/clang-tools-extra/clangd/unittests/StdLibTests.cpp
new file mode 100644
index 0000000000000..0fadc872305c0
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/StdLibTests.cpp
@@ -0,0 +1,162 @@
+//===-- StdLibTests.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Annotations.h"
+#include "ClangdServer.h"
+#include "CodeComplete.h"
+#include "Compiler.h"
+#include "Config.h"
+#include "SyncAPI.h"
+#include "TestFS.h"
+#include "index/StdLib.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace testing;
+
+namespace clang {
+namespace clangd {
+namespace {
+
+// Check the generated header sources contains usual standard library headers.
+TEST(StdLibTests, getStdlibUmbrellaHeader) {
+ LangOptions LO;
+ LO.CPlusPlus = true;
+
+ auto CXX = getStdlibUmbrellaHeader(LO).str();
+ EXPECT_THAT(CXX, HasSubstr("#include <string>"));
+ EXPECT_THAT(CXX, HasSubstr("#include <cstdio>"));
+ EXPECT_THAT(CXX, Not(HasSubstr("#include <stdio.h>")));
+
+ LO.CPlusPlus = false;
+ auto C = getStdlibUmbrellaHeader(LO).str();
+ EXPECT_THAT(C, Not(HasSubstr("#include <string>")));
+ EXPECT_THAT(C, Not(HasSubstr("#include <cstdio>")));
+ EXPECT_THAT(C, HasSubstr("#include <stdio.h>"));
+}
+
+MATCHER_P(Named, Name, "") { return arg.Name == Name; }
+
+// Build an index, and check if it contains the right symbols.
+TEST(StdLibTests, indexStandardLibrary) {
+ MockFS FS;
+ FS.Files["std/foo.h"] = R"cpp(
+ #include <platform_stuff.h>
+ #if __cplusplus >= 201703L
+ int foo17();
+ #elif __cplusplus >= 201402L
+ int foo14();
+ #else
+ bool foo98();
+ #endif
+ )cpp";
+ FS.Files["nonstd/platform_stuff.h"] = "int magic = 42;";
+
+ ParseInputs OriginalInputs;
+ OriginalInputs.TFS = &FS;
+ OriginalInputs.CompileCommand.Filename = testPath("main.cc");
+ OriginalInputs.CompileCommand.CommandLine = {"clang++", testPath("main.cc"),
+ "-isystemstd/",
+ "-isystemnonstd/", "-std=c++14"};
+ OriginalInputs.CompileCommand.Directory = testRoot();
+ IgnoreDiagnostics Diags;
+ auto CI = buildCompilerInvocation(OriginalInputs, Diags);
+ ASSERT_TRUE(CI);
+
+ StdLibLocation Loc;
+ Loc.Paths.push_back(testPath("std/"));
+
+ auto Symbols =
+ indexStandardLibrary("#include <foo.h>", std::move(CI), Loc, FS);
+ EXPECT_THAT(Symbols, ElementsAre(Named("foo14")));
+}
+
+TEST(StdLibTests, StdLibSet) {
+ StdLibSet Set;
+ MockFS FS;
+ FS.Files["std/_"] = "";
+ FS.Files["libc/_"] = "";
+
+ auto Add = [&](const LangOptions &LO,
+ std::vector<llvm::StringRef> SearchPath) {
+ SourceManagerForFile SM("scratch", "");
+ SM.get().getFileManager().setVirtualFileSystem(FS.view(llvm::None));
+ HeaderSearch HS(/*HSOpts=*/nullptr, SM.get(), SM.get().getDiagnostics(), LO,
+ /*Target=*/nullptr);
+ for (auto P : SearchPath)
+ HS.AddSearchPath(
+ DirectoryLookup(
+ cantFail(SM.get().getFileManager().getDirectoryRef(testPath(P))),
+ SrcMgr::C_System, /*isFramework=*/false),
+ true);
+ return Set.add(LO, HS);
+ };
+
+ Config Cfg;
+ Cfg.Index.StandardLibrary = false;
+ WithContextValue Disabled(Config::Key, std::move(Cfg));
+
+ LangOptions LO;
+ LO.CPlusPlus = true;
+ EXPECT_FALSE(Add(LO, {"std"})) << "Disabled in config";
+
+ Cfg = Config();
+ Cfg.Index.StandardLibrary = true;
+ WithContextValue Enabled(Config::Key, std::move(Cfg));
+
+ EXPECT_FALSE(Add(LO, {"std"})) << "No <vector> found";
+ FS.Files["std/vector"] = "class vector;";
+ EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++98";
+ EXPECT_FALSE(Add(LO, {"std"})) << "Don't reindex";
+ LO.CPlusPlus11 = true;
+ EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++11";
+ LO.CPlusPlus = false;
+ EXPECT_FALSE(Add(LO, {"libc"})) << "No <stdio.h>";
+ FS.Files["libc/stdio.h"] = true;
+ EXPECT_TRUE(Add(LO, {"libc"})) << "Indexing as C";
+}
+
+MATCHER_P(StdlibSymbol, Name, "") {
+ return arg.Name == Name && arg.Includes.size() == 1 &&
+ llvm::StringRef(arg.Includes.front().Header).startswith("<");
+}
+
+TEST(StdLibTests, EndToEnd) {
+ Config Cfg;
+ Cfg.Index.StandardLibrary = true;
+ WithContextValue Enabled(Config::Key, std::move(Cfg));
+
+ MockFS FS;
+ FS.Files["stdlib/vector"] =
+ "namespace std { template <class> class vector; }";
+ FS.Files["stdlib/list"] =
+ " namespace std { template <typename T> class list; }";
+ MockCompilationDatabase CDB;
+ CDB.ExtraClangFlags.push_back("-isystem" + testPath("stdlib"));
+ ClangdServer::Options Opts = ClangdServer::optsForTest();
+ Opts.BuildDynamicSymbolIndex = true; // also used for stdlib index
+ ClangdServer Server(CDB, FS, Opts);
+
+ Annotations A("std::^");
+
+ Server.addDocument(testPath("foo.cc"), A.code());
+ ASSERT_TRUE(Server.blockUntilIdleForTest());
+ clangd::CodeCompleteOptions CCOpts;
+ auto Completions =
+ cantFail(runCodeComplete(Server, testPath("foo.cc"), A.point(), CCOpts));
+ EXPECT_THAT(
+ Completions.Completions,
+ UnorderedElementsAre(StdlibSymbol("list"), StdlibSymbol("vector")));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
index 76f4cbafc830b..cf30acb0d6693 100644
--- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
@@ -1123,7 +1123,8 @@ TEST_F(TUSchedulerTests, AsyncPreambleThread) {
public:
BlockPreambleThread(llvm::StringRef BlockVersion, Notification &N)
: BlockVersion(BlockVersion), N(N) {}
- void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx,
+ void onPreambleAST(PathRef Path, llvm::StringRef Version,
+ const CompilerInvocation &, ASTContext &Ctx,
Preprocessor &, const CanonicalIncludes &) override {
if (Version == BlockVersion)
N.wait();
More information about the cfe-commits
mailing list