[clang-tools-extra] ca87553 - Reland(2) "[clangd] Indexing of standard library"

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Wed May 18 06:30:47 PDT 2022


Author: Sam McCall
Date: 2022-05-18T15:30:37+02:00
New Revision: ca875539f788c8063e243ce9ceb877a0d2ad9115

URL: https://github.com/llvm/llvm-project/commit/ca875539f788c8063e243ce9ceb877a0d2ad9115
DIFF: https://github.com/llvm/llvm-project/commit/ca875539f788c8063e243ce9ceb877a0d2ad9115.diff

LOG: Reland(2) "[clangd] Indexing of standard library"

This reverts commit 6aabf60f2fb7589430c0ecc8fe95913c973fa248.

Added: 
    clang-tools-extra/clangd/index/StdLib.cpp
    clang-tools-extra/clangd/index/StdLib.h
    clang-tools-extra/clangd/unittests/StdLibTests.cpp

Modified: 
    clang-tools-extra/clangd/CMakeLists.txt
    clang-tools-extra/clangd/ClangdServer.cpp
    clang-tools-extra/clangd/ClangdServer.h
    clang-tools-extra/clangd/Config.h
    clang-tools-extra/clangd/ConfigCompile.cpp
    clang-tools-extra/clangd/ConfigFragment.h
    clang-tools-extra/clangd/ConfigYAML.cpp
    clang-tools-extra/clangd/TUScheduler.cpp
    clang-tools-extra/clangd/TUScheduler.h
    clang-tools-extra/clangd/index/FileIndex.cpp
    clang-tools-extra/clangd/index/FileIndex.h
    clang-tools-extra/clangd/index/SymbolOrigin.cpp
    clang-tools-extra/clangd/index/SymbolOrigin.h
    clang-tools-extra/clangd/unittests/CMakeLists.txt
    clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 9c37cfe7b7001..7cfbd6f95750e 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -119,6 +119,7 @@ add_clang_library(clangDaemon
   index/Ref.cpp
   index/Relation.cpp
   index/Serialization.cpp
+  index/StdLib.cpp
   index/Symbol.cpp
   index/SymbolCollector.cpp
   index/SymbolID.cpp

diff  --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 80d7d5c5ece19..69a0f63972aae 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -26,6 +26,7 @@
 #include "index/CanonicalIncludes.h"
 #include "index/FileIndex.h"
 #include "index/Merge.h"
+#include "index/StdLib.h"
 #include "refactor/Rename.h"
 #include "refactor/Tweak.h"
 #include "support/Cancellation.h"
@@ -59,16 +60,39 @@ namespace {
 // Update the FileIndex with new ASTs and plumb the diagnostics responses.
 struct UpdateIndexCallbacks : public ParsingCallbacks {
   UpdateIndexCallbacks(FileIndex *FIndex,
-                       ClangdServer::Callbacks *ServerCallbacks)
-      : FIndex(FIndex), ServerCallbacks(ServerCallbacks) {}
+                       ClangdServer::Callbacks *ServerCallbacks,
+                       const ThreadsafeFS &TFS, AsyncTaskRunner *Tasks)
+      : FIndex(FIndex), ServerCallbacks(ServerCallbacks), TFS(TFS),
+        Tasks(Tasks) {}
 
-  void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx,
+  void onPreambleAST(PathRef Path, llvm::StringRef Version,
+                     const CompilerInvocation &CI, ASTContext &Ctx,
                      Preprocessor &PP,
                      const CanonicalIncludes &CanonIncludes) override {
+    // If this preamble uses a standard library we haven't seen yet, index it.
+    if (FIndex)
+      if (auto Loc = Stdlib.add(*CI.getLangOpts(), PP.getHeaderSearchInfo()))
+        indexStdlib(CI, std::move(*Loc));
+
     if (FIndex)
       FIndex->updatePreamble(Path, Version, Ctx, PP, CanonIncludes);
   }
 
+  void indexStdlib(const CompilerInvocation &CI, StdLibLocation Loc) {
+    auto Task = [this, LO(*CI.getLangOpts()), Loc(std::move(Loc)),
+                 CI(std::make_unique<CompilerInvocation>(CI))]() mutable {
+      IndexFileIn IF;
+      IF.Symbols = indexStandardLibrary(std::move(CI), Loc, TFS);
+      if (Stdlib.isBest(LO))
+        FIndex->updatePreamble(std::move(IF));
+    };
+    if (Tasks)
+      // This doesn't have a semaphore to enforce -j, but it's rare.
+      Tasks->runAsync("IndexStdlib", std::move(Task));
+    else
+      Task();
+  }
+
   void onMainAST(PathRef Path, ParsedAST &AST, PublishFn Publish) override {
     if (FIndex)
       FIndex->updateMain(Path, AST);
@@ -103,6 +127,9 @@ struct UpdateIndexCallbacks : public ParsingCallbacks {
 private:
   FileIndex *FIndex;
   ClangdServer::Callbacks *ServerCallbacks;
+  const ThreadsafeFS &TFS;
+  StdLibSet Stdlib;
+  AsyncTaskRunner *Tasks;
 };
 
 class DraftStoreFS : public ThreadsafeFS {
@@ -154,12 +181,15 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB,
       Transient(Opts.ImplicitCancellation ? TUScheduler::InvalidateOnUpdate
                                           : TUScheduler::NoInvalidation),
       DirtyFS(std::make_unique<DraftStoreFS>(TFS, DraftMgr)) {
+  if (Opts.AsyncThreadsCount != 0)
+    IndexTasks.emplace();
   // Pass a callback into `WorkScheduler` to extract symbols from a newly
   // parsed file and rebuild the file index synchronously each time an AST
   // is parsed.
-  WorkScheduler.emplace(
-      CDB, TUScheduler::Options(Opts),
-      std::make_unique<UpdateIndexCallbacks>(DynamicIdx.get(), Callbacks));
+  WorkScheduler.emplace(CDB, TUScheduler::Options(Opts),
+                        std::make_unique<UpdateIndexCallbacks>(
+                            DynamicIdx.get(), Callbacks, TFS,
+                            IndexTasks ? IndexTasks.getPointer() : nullptr));
   // Adds an index to the stack, at higher priority than existing indexes.
   auto AddIndex = [&](SymbolIndex *Idx) {
     if (this->Index != nullptr) {
@@ -975,6 +1005,9 @@ ClangdServer::blockUntilIdleForTest(llvm::Optional<double> TimeoutSeconds) {
   // and we're blocking the main thread.
   if (!WorkScheduler->blockUntilIdle(timeoutSeconds(TimeoutSeconds)))
     return false;
+  // TUScheduler is the only thing that starts background indexing work.
+  if (IndexTasks && !IndexTasks->wait(timeoutSeconds(TimeoutSeconds)))
+    return false;
 
   // Unfortunately we don't have strict topological order between the rest of
   // the components. E.g. CDB broadcast triggers backrgound indexing.

diff  --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h
index 6d999722805ed..e73454901cff0 100644
--- a/clang-tools-extra/clangd/ClangdServer.h
+++ b/clang-tools-extra/clangd/ClangdServer.h
@@ -428,6 +428,7 @@ class ClangdServer {
   mutable std::mutex CachedCompletionFuzzyFindRequestMutex;
 
   llvm::Optional<std::string> WorkspaceRoot;
+  llvm::Optional<AsyncTaskRunner> IndexTasks; // for stdlib indexing.
   llvm::Optional<TUScheduler> WorkScheduler;
   // Invalidation policy used for actions that we assume are "transient".
   TUScheduler::ASTActionInvalidation Transient;

diff  --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h
index 734dce43c5873..ec7247121d5cd 100644
--- a/clang-tools-extra/clangd/Config.h
+++ b/clang-tools-extra/clangd/Config.h
@@ -81,11 +81,12 @@ struct Config {
     /// forward-slashes.
     std::string MountPoint;
   };
-  /// Controls background-index behavior.
+  /// Controls index behavior.
   struct {
-    /// Whether this TU should be indexed.
+    /// Whether this TU should be background-indexed.
     BackgroundPolicy Background = BackgroundPolicy::Build;
     ExternalIndexSpec External;
+    bool StandardLibrary = false;
   } Index;
 
   enum UnusedIncludesPolicy { Strict, None };

diff  --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index a4d7904781e4f..0cdbc5526e6e5 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -332,6 +332,11 @@ struct FragmentCompiler {
     }
     if (F.External)
       compile(std::move(**F.External), F.External->Range);
+    if (F.StandardLibrary)
+      Out.Apply.push_back(
+          [Val(**F.StandardLibrary)](const Params &, Config &C) {
+            C.Index.StandardLibrary = Val;
+          });
   }
 
   void compile(Fragment::IndexBlock::ExternalBlock &&External,

diff  --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
index 34bff844cb26c..5950f8ff655c3 100644
--- a/clang-tools-extra/clangd/ConfigFragment.h
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -199,6 +199,9 @@ struct Fragment {
       llvm::Optional<Located<std::string>> MountPoint;
     };
     llvm::Optional<Located<ExternalBlock>> External;
+    // Whether the standard library visible from this file should be indexed.
+    // This makes all standard library symbols available, included or not.
+    llvm::Optional<Located<bool>> StandardLibrary;
   };
   IndexBlock Index;
 

diff  --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index ec39bb9686640..cec60756a3431 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -184,6 +184,10 @@ class Parser {
       F.External.emplace(std::move(External));
       F.External->Range = N.getSourceRange();
     });
+    Dict.handle("StandardLibrary", [&](Node &N) {
+      if (auto StandardLibrary = boolValue(N, "StandardLibrary"))
+        F.StandardLibrary = *StandardLibrary;
+    });
     Dict.parse(N);
   }
 

diff  --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp
index 19d4ca5a48a50..f60fbfaa479f9 100644
--- a/clang-tools-extra/clangd/TUScheduler.cpp
+++ b/clang-tools-extra/clangd/TUScheduler.cpp
@@ -1013,9 +1013,10 @@ void PreambleThread::build(Request Req) {
   bool IsFirstPreamble = !LatestBuild;
   LatestBuild = clang::clangd::buildPreamble(
       FileName, *Req.CI, Inputs, StoreInMemory,
-      [this, Version(Inputs.Version)](ASTContext &Ctx, Preprocessor &PP,
-                                      const CanonicalIncludes &CanonIncludes) {
-        Callbacks.onPreambleAST(FileName, Version, Ctx, PP, CanonIncludes);
+      [&](ASTContext &Ctx, Preprocessor &PP,
+          const CanonicalIncludes &CanonIncludes) {
+        Callbacks.onPreambleAST(FileName, Inputs.Version, *Req.CI, Ctx, PP,
+                                CanonIncludes);
       },
       &Stats);
   if (!LatestBuild)

diff  --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h
index ceb7ea0f0239a..a852199ba7cb7 100644
--- a/clang-tools-extra/clangd/TUScheduler.h
+++ b/clang-tools-extra/clangd/TUScheduler.h
@@ -133,8 +133,8 @@ class ParsingCallbacks {
   /// contains only AST nodes from the #include directives at the start of the
   /// file. AST node in the current file should be observed on onMainAST call.
   virtual void onPreambleAST(PathRef Path, llvm::StringRef Version,
-                             ASTContext &Ctx, Preprocessor &PP,
-                             const CanonicalIncludes &) {}
+                             const CompilerInvocation &CI, ASTContext &Ctx,
+                             Preprocessor &PP, const CanonicalIncludes &) {}
 
   /// The argument function is run under the critical section guarding against
   /// races when closing the files.

diff  --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index 72f7c0801250b..dcfc4b5981fa2 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -425,12 +425,7 @@ FileIndex::FileIndex()
       MainFileSymbols(IndexContents::All),
       MainFileIndex(std::make_unique<MemIndex>()) {}
 
-void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
-                               ASTContext &AST, Preprocessor &PP,
-                               const CanonicalIncludes &Includes) {
-  IndexFileIn IF;
-  std::tie(IF.Symbols, std::ignore, IF.Relations) =
-      indexHeaderSymbols(Version, AST, PP, Includes);
+void FileIndex::updatePreamble(IndexFileIn IF) {
   FileShardedIndex ShardedIndex(std::move(IF));
   for (auto Uri : ShardedIndex.getAllSources()) {
     auto IF = ShardedIndex.getShard(Uri);
@@ -461,6 +456,15 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
   }
 }
 
+void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
+                               ASTContext &AST, Preprocessor &PP,
+                               const CanonicalIncludes &Includes) {
+  IndexFileIn IF;
+  std::tie(IF.Symbols, std::ignore, IF.Relations) =
+      indexHeaderSymbols(Version, AST, PP, Includes);
+  updatePreamble(std::move(IF));
+}
+
 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
   auto Contents = indexMainDecls(AST);
   MainFileSymbols.update(

diff  --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h
index 4c6f965e78013..24ffbc9c7fb52 100644
--- a/clang-tools-extra/clangd/index/FileIndex.h
+++ b/clang-tools-extra/clangd/index/FileIndex.h
@@ -114,6 +114,7 @@ class FileIndex : public MergedIndex {
   /// and macros in \p PP.
   void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
                       Preprocessor &PP, const CanonicalIncludes &Includes);
+  void updatePreamble(IndexFileIn);
 
   /// Update symbols and references from main file \p Path with
   /// `indexMainDecls`.

diff  --git a/clang-tools-extra/clangd/index/StdLib.cpp b/clang-tools-extra/clangd/index/StdLib.cpp
new file mode 100644
index 0000000000000..64689418464b3
--- /dev/null
+++ b/clang-tools-extra/clangd/index/StdLib.cpp
@@ -0,0 +1,363 @@
+//===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "StdLib.h"
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "Compiler.h"
+#include "Config.h"
+#include "SymbolCollector.h"
+#include "index/IndexAction.h"
+#include "support/Logger.h"
+#include "support/ThreadsafeFS.h"
+#include "support/Trace.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+enum Lang { C, CXX };
+
+Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; }
+llvm::StringLiteral mandatoryHeader(Lang L) {
+  switch (L) {
+  case C:
+    return "stdio.h";
+  case CXX:
+    return "vector";
+  }
+  llvm_unreachable("unhandled Lang");
+}
+
+LangStandard::Kind standardFromOpts(const LangOptions &LO) {
+  if (LO.CPlusPlus) {
+    if (LO.CPlusPlus2b)
+      return LangStandard::lang_cxx2b;
+    if (LO.CPlusPlus20)
+      return LangStandard::lang_cxx20;
+    if (LO.CPlusPlus17)
+      return LangStandard::lang_cxx17;
+    if (LO.CPlusPlus14)
+      return LangStandard::lang_cxx14;
+    if (LO.CPlusPlus11)
+      return LangStandard::lang_cxx11;
+    return LangStandard::lang_cxx98;
+  }
+  if (LO.C2x)
+    return LangStandard::lang_c2x;
+  // C17 has no new features, so treat {C11,C17} as C17.
+  if (LO.C11)
+    return LangStandard::lang_c17;
+  return LangStandard::lang_c99;
+}
+
+std::string buildUmbrella(llvm::StringLiteral Mandatory,
+                          std::vector<llvm::StringLiteral> Headers) {
+  std::string Result;
+  llvm::raw_string_ostream OS(Result);
+
+  // We __has_include guard all our #includes to avoid errors when using older
+  // stdlib version that don't have headers for the newest language standards.
+  // But make sure we get *some* error if things are totally broken.
+  OS << llvm::formatv(
+      "#if !__has_include(<{0}>)\n"
+      "#error Mandatory header <{0}> not found in standard library!\n"
+      "#endif\n",
+      Mandatory);
+
+  llvm::sort(Headers.begin(), Headers.end());
+  auto Last = std::unique(Headers.begin(), Headers.end());
+  for (auto Header = Headers.begin(); Header != Last; ++Header) {
+    OS << llvm::formatv("#if __has_include({0})\n"
+                        "#include {0}\n"
+                        "#endif\n",
+                        *Header);
+  }
+  OS.flush();
+  return Result;
+}
+
+} // namespace
+
+llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) {
+  // The umbrella header is the same for all versions of each language.
+  // Headers that are unsupported in old lang versions are usually guarded by
+  // #if. Some headers may be not present in old stdlib versions, the umbrella
+  // header guards with __has_include for this purpose.
+  Lang L = langFromOpts(LO);
+  switch (L) {
+  case CXX:
+    static std::string *UmbrellaCXX =
+        new std::string(buildUmbrella(mandatoryHeader(L), {
+#define SYMBOL(Name, NameSpace, Header) #Header,
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+                                                          }));
+    return *UmbrellaCXX;
+  case C:
+    static std::string *UmbrellaC =
+        new std::string(buildUmbrella(mandatoryHeader(L), {
+#define SYMBOL(Name, NameSpace, Header) #Header,
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#undef SYMBOL
+                                                          }));
+    return *UmbrellaC;
+  }
+}
+
+namespace {
+
+// Including the standard library leaks unwanted transitively included symbols.
+//
+// We want to drop these, they're a bit tricky to identify:
+//  - we don't want to limit to symbols on our list, as our list has only
+//    top-level symbols (and there may be legitimate stdlib extensions).
+//  - we can't limit to only symbols defined in known stdlib headers, as stdlib
+//    internal structure is murky
+//  - we can't strictly require symbols to come from a particular path, e.g.
+//      libstdc++ is mostly under /usr/include/c++/10/...
+//      but std::ctype_base is under /usr/include/<platform>/c++/10/...
+// We require the symbol to come from a header that is *either* from
+// the standard library path (as identified by the location of <vector>), or
+// another header that defines a symbol from our stdlib list.
+SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) {
+  SymbolSlab::Builder Result;
+
+  static auto &StandardHeaders = *[] {
+    auto *Set = new llvm::DenseSet<llvm::StringRef>();
+    for (llvm::StringRef Header : {
+#define SYMBOL(Name, NameSpace, Header) #Header,
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+         })
+      Set->insert(Header);
+    return Set;
+  }();
+
+  // Form prefixes like file:///usr/include/c++/10/
+  // These can be trivially prefix-compared with URIs in the indexed symbols.
+  llvm::SmallVector<std::string> StdLibURIPrefixes;
+  for (const auto &Path : Loc.Paths) {
+    StdLibURIPrefixes.push_back(URI::create(Path).toString());
+    if (StdLibURIPrefixes.back().back() != '/')
+      StdLibURIPrefixes.back().push_back('/');
+  }
+  // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
+  // owner of a symbol whose insertable header is in StandardHeaders?
+  // Pointer key because strings in a SymbolSlab are interned.
+  llvm::DenseMap<const char *, bool> GoodHeader;
+  for (const Symbol &S : Slab) {
+    if (!S.IncludeHeaders.empty() &&
+        StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) {
+      GoodHeader[S.CanonicalDeclaration.FileURI] = true;
+      GoodHeader[S.Definition.FileURI] = true;
+      continue;
+    }
+    for (const char *URI :
+         {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) {
+      auto R = GoodHeader.try_emplace(URI, false);
+      if (R.second) {
+        R.first->second = llvm::any_of(
+            StdLibURIPrefixes,
+            [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) {
+              return URIStr.startswith(Prefix);
+            });
+      }
+    }
+  }
+#ifndef NDEBUG
+  for (const auto &Good : GoodHeader)
+    if (Good.second && *Good.first)
+      dlog("Stdlib header: {0}", Good.first);
+#endif
+  // Empty URIs aren't considered good. (Definition can be blank).
+  auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); };
+
+  for (const Symbol &S : Slab) {
+    if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) ||
+          IsGoodHeader(S.Definition.FileURI))) {
+      dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name,
+           S.CanonicalDeclaration.FileURI);
+      continue;
+    }
+    Result.insert(S);
+  }
+
+  return std::move(Result).build();
+}
+
+} // namespace
+
+SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
+                                std::unique_ptr<CompilerInvocation> CI,
+                                const StdLibLocation &Loc,
+                                const ThreadsafeFS &TFS) {
+  if (CI->getFrontendOpts().Inputs.size() != 1 ||
+      !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) {
+    elog("Indexing standard library failed: bad CompilerInvocation");
+    assert(false && "indexing stdlib with a dubious CompilerInvocation!");
+    return SymbolSlab();
+  }
+  const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front();
+  trace::Span Tracer("StandardLibraryIndex");
+  LangStandard::Kind LangStd = standardFromOpts(*CI->getLangOpts());
+  log("Indexing {0} standard library in the context of {1}",
+      LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile());
+
+  SymbolSlab Symbols;
+  IgnoreDiagnostics IgnoreDiags;
+  // CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
+  CI->getPreprocessorOpts().clearRemappedFiles();
+  auto Clang = prepareCompilerInstance(
+      std::move(CI), /*Preamble=*/nullptr,
+      llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()),
+      TFS.view(/*CWD=*/llvm::None), IgnoreDiags);
+  if (!Clang) {
+    elog("Standard Library Index: Couldn't build compiler instance");
+    return Symbols;
+  }
+
+  SymbolCollector::Options IndexOpts;
+  IndexOpts.Origin = SymbolOrigin::StdLib;
+  IndexOpts.CollectMainFileSymbols = false;
+  IndexOpts.CollectMainFileRefs = false;
+  IndexOpts.CollectMacro = true;
+  IndexOpts.StoreAllDocumentation = true;
+  // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope.
+  // Files from outside the StdLibLocation may define true std symbols anyway.
+  // We end up "blessing" such headers, and can only do that by indexing
+  // everything first.
+
+  // Refs, relations, include graph in the stdlib mostly aren't useful.
+  auto Action = createStaticIndexingAction(
+      IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr,
+      nullptr, nullptr);
+
+  if (!Action->BeginSourceFile(*Clang, Input)) {
+    elog("Standard Library Index: BeginSourceFile() failed");
+    return Symbols;
+  }
+
+  if (llvm::Error Err = Action->Execute()) {
+    elog("Standard Library Index: Execute failed: {0}", std::move(Err));
+    return Symbols;
+  }
+
+  Action->EndSourceFile();
+
+  unsigned SymbolsBeforeFilter = Symbols.size();
+  Symbols = filter(std::move(Symbols), Loc);
+  bool Errors = Clang->hasDiagnostics() &&
+                Clang->getDiagnostics().hasUncompilableErrorOccurred();
+  log("Indexed {0} standard library{3}: {1} symbols, {2} filtered",
+      LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(),
+      SymbolsBeforeFilter - Symbols.size(),
+      Errors ? " (incomplete due to errors)" : "");
+  SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
+  return Symbols;
+}
+
+SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
+                                const StdLibLocation &Loc,
+                                const ThreadsafeFS &TFS) {
+  return indexStandardLibrary(
+      getStdlibUmbrellaHeader(*Invocation->getLangOpts()),
+      std::move(Invocation), Loc, TFS);
+}
+
+bool StdLibSet::isBest(const LangOptions &LO) const {
+  return standardFromOpts(LO) >=
+         Best[langFromOpts(LO)].load(std::memory_order_acquire);
+}
+
+llvm::Optional<StdLibLocation> StdLibSet::add(const LangOptions &LO,
+                                              const HeaderSearch &HS) {
+  Lang L = langFromOpts(LO);
+  int OldVersion = Best[L].load(std::memory_order_acquire);
+  int NewVersion = standardFromOpts(LO);
+  dlog("Index stdlib? {0}",
+       LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName());
+
+  if (!Config::current().Index.StandardLibrary) {
+    dlog("No: disabled in config");
+    return llvm::None;
+  }
+
+  if (NewVersion <= OldVersion) {
+    dlog("No: have {0}, {1}>={2}",
+         LangStandard::getLangStandardForKind(
+             static_cast<LangStandard::Kind>(NewVersion))
+             .getName(),
+         OldVersion, NewVersion);
+    return llvm::None;
+  }
+
+  // We'd like to index a standard library here if there is one.
+  // Check for the existence of <vector> on the search path.
+  // We could cache this, but we only get here repeatedly when there's no
+  // stdlib, and even then only once per preamble build.
+  llvm::StringLiteral ProbeHeader = mandatoryHeader(L);
+  llvm::SmallString<256> Path; // Scratch space.
+  llvm::SmallVector<std::string> SearchPaths;
+  auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) {
+    llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath);
+    if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path))
+      SearchPaths.emplace_back(Path);
+  };
+  for (const auto &DL :
+       llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) {
+    switch (DL.getLookupType()) {
+    case DirectoryLookup::LT_NormalDir: {
+      Path = DL.getDir()->getName();
+      llvm::sys::path::append(Path, ProbeHeader);
+      llvm::vfs::Status Stat;
+      if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) &&
+          Stat.isRegularFile())
+        RecordHeaderPath(Path);
+      break;
+    }
+    case DirectoryLookup::LT_Framework:
+      // stdlib can't be a framework (framework includes must have a slash)
+      continue;
+    case DirectoryLookup::LT_HeaderMap:
+      llvm::StringRef Target =
+          DL.getHeaderMap()->lookupFilename(ProbeHeader, Path);
+      if (!Target.empty())
+        RecordHeaderPath(Target);
+      break;
+    }
+  }
+  if (SearchPaths.empty())
+    return llvm::None;
+
+  dlog("Found standard library in {0}", llvm::join(SearchPaths, ", "));
+
+  while (!Best[L].compare_exchange_weak(OldVersion, NewVersion,
+                                        std::memory_order_acq_rel))
+    if (OldVersion >= NewVersion) {
+      dlog("No: lost the race");
+      return llvm::None; // Another thread won the race while we were checking.
+    }
+
+  dlog("Yes, index stdlib!");
+  return StdLibLocation{std::move(SearchPaths)};
+}
+
+} // namespace clangd
+} // namespace clang

diff  --git a/clang-tools-extra/clangd/index/StdLib.h b/clang-tools-extra/clangd/index/StdLib.h
new file mode 100644
index 0000000000000..6df30ace669c8
--- /dev/null
+++ b/clang-tools-extra/clangd/index/StdLib.h
@@ -0,0 +1,110 @@
+//===--- StdLib.h - Index the C and C++ standard library ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Eagerly indexing the standard library gives a much friendlier "warm start"
+// with working code completion in a standalone file or small project.
+//
+// We act as if we saw a file which included the whole standard library:
+//   #include <array>
+//   #include <bitset>
+//   #include <chrono>
+//   ...
+// We index this TU and feed the result into the dynamic index.
+//
+// This happens within the context of some particular open file, and we reuse
+// its CompilerInvocation. Matching its include path, LangOpts etc ensures that
+// we see the standard library and configuration that matches the project.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H
+
+#include "index/Symbol.h"
+#include "support/ThreadsafeFS.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace clang {
+class CompilerInvocation;
+class LangOptions;
+class HeaderSearch;
+namespace clangd {
+
+// The filesystem location where a standard library was found.
+//
+// This is the directory containing <vector> or <stdio.h>.
+// It's used to ensure we only index files that are in the standard library.
+//
+// The paths are canonicalized (FS "real path" with symlinks resolved).
+// This allows them to be easily compared against paths the indexer returns.
+struct StdLibLocation {
+  llvm::SmallVector<std::string> Paths;
+};
+
+// Tracks the state of standard library indexing within a particular index.
+//
+// In general, we don't want to index the standard library multiple times.
+// In most cases, this class just acts as a flag to ensure we only do it once.
+//
+// However, if we first open a C++11 file, and then a C++20 file, we *do*
+// want the index to be upgraded to include the extra symbols.
+// Similarly, the C and C++ standard library can coexist.
+class StdLibSet {
+  std::atomic<int> Best[2] = {{-1}, {-1}};
+
+public:
+  // Determines if we should index the standard library in a configuration.
+  //
+  // This is true if:
+  //  - standard library indexing is enabled for the file
+  //  - the language version is higher than any previous add() for the language
+  //  - the standard library headers exist on the search path
+  // Returns the location where the standard library was found.
+  //
+  // This function is threadsafe.
+  llvm::Optional<StdLibLocation> add(const LangOptions &, const HeaderSearch &);
+
+  // Indicates whether a built index should be used.
+  // It should not be used if a newer version has subsequently been added.
+  //
+  // Intended pattern is:
+  //   if (add()) {
+  //     symbols = indexStandardLibrary();
+  //     if (isBest())
+  //       index.update(symbols);
+  //   }
+  //
+  // This is still technically racy: we could return true here, then another
+  // thread could add->index->update a better library before we can update.
+  // We'd then overwrite it with the older version.
+  // However, it's very unlikely: indexing takes a long time.
+  bool isBest(const LangOptions &) const;
+};
+
+// Index a standard library and return the discovered symbols.
+//
+// The compiler invocation should describe the file whose config we're reusing.
+// We overwrite its virtual buffer with a lot of #include statements.
+SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
+                                const StdLibLocation &Loc,
+                                const ThreadsafeFS &TFS);
+
+// Variant that allows the umbrella header source to be specified.
+// Exposed for testing.
+SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
+                                std::unique_ptr<CompilerInvocation> CI,
+                                const StdLibLocation &Loc,
+                                const ThreadsafeFS &TFS);
+
+// Generate header containing #includes for all standard library headers.
+// Exposed for testing.
+llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &);
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H

diff  --git a/clang-tools-extra/clangd/index/SymbolOrigin.cpp b/clang-tools-extra/clangd/index/SymbolOrigin.cpp
index 46a84f2ca9846..e893ff78b8aed 100644
--- a/clang-tools-extra/clangd/index/SymbolOrigin.cpp
+++ b/clang-tools-extra/clangd/index/SymbolOrigin.cpp
@@ -14,7 +14,7 @@ namespace clangd {
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, SymbolOrigin O) {
   if (O == SymbolOrigin::Unknown)
     return OS << "unknown";
-  constexpr static char Sigils[] = "AOSMIRP7B9012345";
+  constexpr static char Sigils[] = "AOSMIRP7BL012345";
   for (unsigned I = 0; I < sizeof(Sigils); ++I)
     if (static_cast<uint16_t>(O) & 1u << I)
       OS << Sigils[I];

diff  --git a/clang-tools-extra/clangd/index/SymbolOrigin.h b/clang-tools-extra/clangd/index/SymbolOrigin.h
index 18e3616d5a9cc..2e7a3fa745e22 100644
--- a/clang-tools-extra/clangd/index/SymbolOrigin.h
+++ b/clang-tools-extra/clangd/index/SymbolOrigin.h
@@ -29,6 +29,7 @@ enum class SymbolOrigin : uint16_t {
   Preamble = 1 << 6,   // From the dynamic index of preambles.
                        // 7 reserved
   Background = 1 << 8, // From the automatic project index.
+  StdLib = 1 << 9,     // Standard library index.
 };
 
 inline SymbolOrigin operator|(SymbolOrigin A, SymbolOrigin B) {

diff  --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt
index 8309be64ef238..692d7f8038d95 100644
--- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -81,6 +81,7 @@ add_unittest(ClangdUnitTests ClangdTests
   SemanticSelectionTests.cpp
   SerializationTests.cpp
   SourceCodeTests.cpp
+  StdLibTests.cpp
   SymbolCollectorTests.cpp
   SymbolInfoTests.cpp
   SyncAPI.cpp

diff  --git a/clang-tools-extra/clangd/unittests/StdLibTests.cpp b/clang-tools-extra/clangd/unittests/StdLibTests.cpp
new file mode 100644
index 0000000000000..0fadc872305c0
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/StdLibTests.cpp
@@ -0,0 +1,162 @@
+//===-- StdLibTests.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Annotations.h"
+#include "ClangdServer.h"
+#include "CodeComplete.h"
+#include "Compiler.h"
+#include "Config.h"
+#include "SyncAPI.h"
+#include "TestFS.h"
+#include "index/StdLib.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace testing;
+
+namespace clang {
+namespace clangd {
+namespace {
+
+// Check the generated header sources contains usual standard library headers.
+TEST(StdLibTests, getStdlibUmbrellaHeader) {
+  LangOptions LO;
+  LO.CPlusPlus = true;
+
+  auto CXX = getStdlibUmbrellaHeader(LO).str();
+  EXPECT_THAT(CXX, HasSubstr("#include <string>"));
+  EXPECT_THAT(CXX, HasSubstr("#include <cstdio>"));
+  EXPECT_THAT(CXX, Not(HasSubstr("#include <stdio.h>")));
+
+  LO.CPlusPlus = false;
+  auto C = getStdlibUmbrellaHeader(LO).str();
+  EXPECT_THAT(C, Not(HasSubstr("#include <string>")));
+  EXPECT_THAT(C, Not(HasSubstr("#include <cstdio>")));
+  EXPECT_THAT(C, HasSubstr("#include <stdio.h>"));
+}
+
+MATCHER_P(Named, Name, "") { return arg.Name == Name; }
+
+// Build an index, and check if it contains the right symbols.
+TEST(StdLibTests, indexStandardLibrary) {
+  MockFS FS;
+  FS.Files["std/foo.h"] = R"cpp(
+  #include <platform_stuff.h>
+  #if __cplusplus >= 201703L
+    int foo17();
+  #elif __cplusplus >= 201402L
+    int foo14();
+  #else
+    bool foo98();
+  #endif
+  )cpp";
+  FS.Files["nonstd/platform_stuff.h"] = "int magic = 42;";
+
+  ParseInputs OriginalInputs;
+  OriginalInputs.TFS = &FS;
+  OriginalInputs.CompileCommand.Filename = testPath("main.cc");
+  OriginalInputs.CompileCommand.CommandLine = {"clang++", testPath("main.cc"),
+                                               "-isystemstd/",
+                                               "-isystemnonstd/", "-std=c++14"};
+  OriginalInputs.CompileCommand.Directory = testRoot();
+  IgnoreDiagnostics Diags;
+  auto CI = buildCompilerInvocation(OriginalInputs, Diags);
+  ASSERT_TRUE(CI);
+
+  StdLibLocation Loc;
+  Loc.Paths.push_back(testPath("std/"));
+
+  auto Symbols =
+      indexStandardLibrary("#include <foo.h>", std::move(CI), Loc, FS);
+  EXPECT_THAT(Symbols, ElementsAre(Named("foo14")));
+}
+
+TEST(StdLibTests, StdLibSet) {
+  StdLibSet Set;
+  MockFS FS;
+  FS.Files["std/_"] = "";
+  FS.Files["libc/_"] = "";
+
+  auto Add = [&](const LangOptions &LO,
+                 std::vector<llvm::StringRef> SearchPath) {
+    SourceManagerForFile SM("scratch", "");
+    SM.get().getFileManager().setVirtualFileSystem(FS.view(llvm::None));
+    HeaderSearch HS(/*HSOpts=*/nullptr, SM.get(), SM.get().getDiagnostics(), LO,
+                    /*Target=*/nullptr);
+    for (auto P : SearchPath)
+      HS.AddSearchPath(
+          DirectoryLookup(
+              cantFail(SM.get().getFileManager().getDirectoryRef(testPath(P))),
+              SrcMgr::C_System, /*isFramework=*/false),
+          true);
+    return Set.add(LO, HS);
+  };
+
+  Config Cfg;
+  Cfg.Index.StandardLibrary = false;
+  WithContextValue Disabled(Config::Key, std::move(Cfg));
+
+  LangOptions LO;
+  LO.CPlusPlus = true;
+  EXPECT_FALSE(Add(LO, {"std"})) << "Disabled in config";
+
+  Cfg = Config();
+  Cfg.Index.StandardLibrary = true;
+  WithContextValue Enabled(Config::Key, std::move(Cfg));
+
+  EXPECT_FALSE(Add(LO, {"std"})) << "No <vector> found";
+  FS.Files["std/vector"] = "class vector;";
+  EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++98";
+  EXPECT_FALSE(Add(LO, {"std"})) << "Don't reindex";
+  LO.CPlusPlus11 = true;
+  EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++11";
+  LO.CPlusPlus = false;
+  EXPECT_FALSE(Add(LO, {"libc"})) << "No <stdio.h>";
+  FS.Files["libc/stdio.h"] = true;
+  EXPECT_TRUE(Add(LO, {"libc"})) << "Indexing as C";
+}
+
+MATCHER_P(StdlibSymbol, Name, "") {
+  return arg.Name == Name && arg.Includes.size() == 1 &&
+         llvm::StringRef(arg.Includes.front().Header).startswith("<");
+}
+
+TEST(StdLibTests, EndToEnd) {
+  Config Cfg;
+  Cfg.Index.StandardLibrary = true;
+  WithContextValue Enabled(Config::Key, std::move(Cfg));
+
+  MockFS FS;
+  FS.Files["stdlib/vector"] =
+      "namespace std { template <class> class vector; }";
+  FS.Files["stdlib/list"] =
+      " namespace std { template <typename T> class list; }";
+  MockCompilationDatabase CDB;
+  CDB.ExtraClangFlags.push_back("-isystem" + testPath("stdlib"));
+  ClangdServer::Options Opts = ClangdServer::optsForTest();
+  Opts.BuildDynamicSymbolIndex = true; // also used for stdlib index
+  ClangdServer Server(CDB, FS, Opts);
+
+  Annotations A("std::^");
+
+  Server.addDocument(testPath("foo.cc"), A.code());
+  ASSERT_TRUE(Server.blockUntilIdleForTest());
+  clangd::CodeCompleteOptions CCOpts;
+  auto Completions =
+      cantFail(runCodeComplete(Server, testPath("foo.cc"), A.point(), CCOpts));
+  EXPECT_THAT(
+      Completions.Completions,
+      UnorderedElementsAre(StdlibSymbol("list"), StdlibSymbol("vector")));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang

diff  --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
index 76f4cbafc830b..cf30acb0d6693 100644
--- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
@@ -1123,7 +1123,8 @@ TEST_F(TUSchedulerTests, AsyncPreambleThread) {
   public:
     BlockPreambleThread(llvm::StringRef BlockVersion, Notification &N)
         : BlockVersion(BlockVersion), N(N) {}
-    void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx,
+    void onPreambleAST(PathRef Path, llvm::StringRef Version,
+                       const CompilerInvocation &, ASTContext &Ctx,
                        Preprocessor &, const CanonicalIncludes &) override {
       if (Version == BlockVersion)
         N.wait();


        


More information about the cfe-commits mailing list