[clang] 46a6f5a - [clangd] NFC: Move stdlib headers handling to Clang

Kirill Bobyrev via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 9 02:05:48 PST 2022


Author: Kirill Bobyrev
Date: 2022-02-09T11:05:39+01:00
New Revision: 46a6f5ae148ae2044f13cddf1bb1498a8bcfb372

URL: https://github.com/llvm/llvm-project/commit/46a6f5ae148ae2044f13cddf1bb1498a8bcfb372
DIFF: https://github.com/llvm/llvm-project/commit/46a6f5ae148ae2044f13cddf1bb1498a8bcfb372.diff

LOG: [clangd] NFC: Move stdlib headers handling to Clang

This will allow moving the IncludeCleaner library essentials to Clang
and decoupling them from the majority of clangd.

The patch itself just moves the code, it doesn't change existing
functionality.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D119130

Added: 
    clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
    clang/include/clang/Tooling/Inclusions/StandardLibrary.h
    clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
    clang/lib/Tooling/Inclusions/StandardLibrary.cpp
    clang/tools/include-mapping/cppreference_parser.py
    clang/tools/include-mapping/gen_std.py
    clang/tools/include-mapping/test.py
    clang/unittests/Tooling/StandardLibraryTest.cpp

Modified: 
    clang-tools-extra/clangd/Headers.cpp
    clang-tools-extra/clangd/Headers.h
    clang-tools-extra/clangd/IncludeCleaner.cpp
    clang-tools-extra/clangd/IncludeCleaner.h
    clang-tools-extra/clangd/index/CanonicalIncludes.cpp
    clang-tools-extra/clangd/unittests/HeadersTests.cpp
    clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
    clang/lib/Tooling/Inclusions/CMakeLists.txt
    clang/unittests/Tooling/CMakeLists.txt

Removed: 
    clang-tools-extra/clangd/CSymbolMap.inc
    clang-tools-extra/clangd/StdSymbolMap.inc
    clang-tools-extra/clangd/include-mapping/cppreference_parser.py
    clang-tools-extra/clangd/include-mapping/gen_std.py
    clang-tools-extra/clangd/include-mapping/test.py


################################################################################
diff  --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp
index 72da1be99283c..da6f51738d83b 100644
--- a/clang-tools-extra/clangd/Headers.cpp
+++ b/clang-tools-extra/clangd/Headers.cpp
@@ -67,7 +67,7 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
         IncludeStructure::HeaderID HID = Out->getOrCreateID(File);
         Inc.HeaderID = static_cast<unsigned>(HID);
         if (IsAngled)
-          if (auto StdlibHeader = stdlib::Header::named(Inc.Written)) {
+          if (auto StdlibHeader = tooling::stdlib::Header::named(Inc.Written)) {
             auto &IDs = Out->StdlibHeaders[*StdlibHeader];
             // Few physical files for one stdlib header name, linear scan is ok.
             if (!llvm::is_contained(IDs, HID))
@@ -350,154 +350,5 @@ bool operator==(const Inclusion &LHS, const Inclusion &RHS) {
                   RHS.Resolved, RHS.Written);
 }
 
-namespace stdlib {
-static llvm::StringRef *HeaderNames;
-static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
-static unsigned *SymbolHeaderIDs;
-static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
-// Maps symbol name -> Symbol::ID, within a namespace.
-using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
-static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
-
-static int initialize() {
-  unsigned SymCount = 0;
-#define SYMBOL(Name, NS, Header) ++SymCount;
-#include "CSymbolMap.inc"
-#include "StdSymbolMap.inc"
-#undef SYMBOL
-  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
-  SymbolHeaderIDs =
-      new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
-  NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
-  HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
-
-  auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
-    auto R = NamespaceSymbols->try_emplace(NS, nullptr);
-    if (R.second)
-      R.first->second = new NSSymbolMap();
-    return *R.first->second;
-  };
-
-  auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
-    return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
-  };
-
-  auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
-                              llvm::StringRef HeaderName) mutable {
-    if (NS == "None")
-      NS = "";
-
-    SymbolNames[SymIndex] = {NS, Name};
-    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
-
-    NSSymbolMap &NSSymbols = AddNS(NS);
-    NSSymbols.try_emplace(Name, SymIndex);
-
-    ++SymIndex;
-  };
-#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
-#include "CSymbolMap.inc"
-#include "StdSymbolMap.inc"
-#undef SYMBOL
-
-  HeaderNames = new llvm::StringRef[HeaderIDs->size()];
-  for (const auto &E : *HeaderIDs)
-    HeaderNames[E.second] = E.first;
-
-  return 0;
-}
-
-static void ensureInitialized() {
-  static int Dummy = initialize();
-  (void)Dummy;
-}
-
-llvm::Optional<Header> Header::named(llvm::StringRef Name) {
-  ensureInitialized();
-  auto It = HeaderIDs->find(Name);
-  if (It == HeaderIDs->end())
-    return llvm::None;
-  return Header(It->second);
-}
-llvm::StringRef Header::name() const { return HeaderNames[ID]; }
-llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
-llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
-llvm::Optional<Symbol> Symbol::named(llvm::StringRef Scope,
-                                     llvm::StringRef Name) {
-  ensureInitialized();
-  if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
-    auto It = NSSymbols->find(Name);
-    if (It != NSSymbols->end())
-      return Symbol(It->second);
-  }
-  return llvm::None;
-}
-Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
-llvm::SmallVector<Header> Symbol::headers() const {
-  return {header()}; // FIXME: multiple in case of ambiguity
-}
-
-Recognizer::Recognizer() { ensureInitialized(); }
-
-NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
-  auto It = NamespaceCache.find(D);
-  if (It != NamespaceCache.end())
-    return It->second;
-
-  NSSymbolMap *Result = [&]() -> NSSymbolMap * {
-    if (!D) // Nullptr means the global namespace
-      return NamespaceSymbols->lookup("");
-    if (D->isAnonymousNamespace())
-      return nullptr;
-    if (D->isInlineNamespace()) {
-      if (auto *Parent = llvm::dyn_cast_or_null<NamespaceDecl>(D->getParent()))
-        return namespaceSymbols(Parent);
-      return nullptr;
-    }
-    return NamespaceSymbols->lookup(printNamespaceScope(*D));
-  }();
-  NamespaceCache.try_emplace(D, Result);
-  return Result;
-}
-
-llvm::Optional<Symbol> Recognizer::operator()(const Decl *D) {
-  // If D is std::vector::iterator, `vector` is the outer symbol to look up.
-  // We keep all the candidate DCs as some may turn out to be anon enums.
-  // Do this resolution lazily as we may turn out not to have a std namespace.
-  llvm::SmallVector<const DeclContext *> IntermediateDecl;
-  const DeclContext *DC = D->getDeclContext();
-  while (DC && !DC->isNamespace()) {
-    if (NamedDecl::classofKind(DC->getDeclKind()))
-      IntermediateDecl.push_back(DC);
-    DC = DC->getParent();
-  }
-  NSSymbolMap *Symbols = namespaceSymbols(cast_or_null<NamespaceDecl>(DC));
-  if (!Symbols)
-    return llvm::None;
-
-  llvm::StringRef Name = [&]() -> llvm::StringRef {
-    for (const auto *SymDC : llvm::reverse(IntermediateDecl)) {
-      DeclarationName N = cast<NamedDecl>(SymDC)->getDeclName();
-      if (const auto *II = N.getAsIdentifierInfo())
-        return II->getName();
-      if (!N.isEmpty())
-        return ""; // e.g. operator<: give up
-    }
-    if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
-      if (const auto *II = ND->getIdentifier())
-        return II->getName();
-    return "";
-  }();
-  if (Name.empty())
-    return llvm::None;
-
-  auto It = Symbols->find(Name);
-  if (It == Symbols->end())
-    return llvm::None;
-  return Symbol(It->second);
-}
-
-} // namespace stdlib
-
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h
index 3b510325da042..ce04d3dbd4620 100644
--- a/clang-tools-extra/clangd/Headers.h
+++ b/clang-tools-extra/clangd/Headers.h
@@ -22,6 +22,7 @@
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
@@ -32,79 +33,8 @@
 #include <string>
 
 namespace clang {
-class Decl;
-class NamespaceDecl;
 namespace clangd {
 
-// clangd has a built-in database of standard library symbols.
-namespace stdlib {
-class Symbol;
-
-// A standard library header, such as <iostream>
-// Lightweight class, in fact just an index into a table.
-class Header {
-public:
-  static llvm::Optional<Header> named(llvm::StringRef Name);
-
-  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
-    return OS << H.name();
-  }
-  llvm::StringRef name() const;
-
-private:
-  Header(unsigned ID) : ID(ID) {}
-  unsigned ID;
-  friend Symbol;
-  friend llvm::DenseMapInfo<Header>;
-  friend bool operator==(const Header &L, const Header &R) {
-    return L.ID == R.ID;
-  }
-};
-
-// A top-level standard library symbol, such as std::vector
-// Lightweight class, in fact just an index into a table.
-class Symbol {
-public:
-  static llvm::Optional<Symbol> named(llvm::StringRef Scope,
-                                      llvm::StringRef Name);
-
-  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
-    return OS << S.scope() << S.name();
-  }
-  llvm::StringRef scope() const;
-  llvm::StringRef name() const;
-  // The preferred header for this symbol (e.g. the suggested insertion).
-  Header header() const;
-  // Some symbols may be provided my multiple headers.
-  llvm::SmallVector<Header> headers() const;
-
-private:
-  Symbol(unsigned ID) : ID(ID) {}
-  unsigned ID;
-  friend class Recognizer;
-  friend llvm::DenseMapInfo<Symbol>;
-  friend bool operator==(const Symbol &L, const Symbol &R) {
-    return L.ID == R.ID;
-  }
-};
-
-// A functor to find the stdlib::Symbol associated with a decl.
-//
-// For non-top-level decls (std::vector<int>::iterator), returns the top-level
-// symbol (std::vector).
-class Recognizer {
-public:
-  Recognizer();
-  llvm::Optional<Symbol> operator()(const Decl *D);
-
-private:
-  using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
-  NSSymbolMap *namespaceSymbols(const NamespaceDecl *D);
-  llvm::DenseMap<const DeclContext *, NSSymbolMap *> NamespaceCache;
-};
-
-} // namespace stdlib
-
 /// Returns true if \p Include is literal include like "path" or <path>.
 bool isLiteralInclude(llvm::StringRef Include);
 
@@ -231,7 +161,8 @@ class IncludeStructure {
   // Maps HeaderID to the ids of the files included from it.
   llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren;
 
-  llvm::DenseMap<stdlib::Header, llvm::SmallVector<HeaderID>> StdlibHeaders;
+  llvm::DenseMap<tooling::stdlib::Header, llvm::SmallVector<HeaderID>>
+      StdlibHeaders;
 
   std::vector<Inclusion> MainFileIncludes;
 
@@ -341,38 +272,6 @@ template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> {
   }
 };
 
-template <> struct DenseMapInfo<clang::clangd::stdlib::Header> {
-  static inline clang::clangd::stdlib::Header getEmptyKey() {
-    return clang::clangd::stdlib::Header(-1);
-  }
-  static inline clang::clangd::stdlib::Header getTombstoneKey() {
-    return clang::clangd::stdlib::Header(-2);
-  }
-  static unsigned getHashValue(const clang::clangd::stdlib::Header &H) {
-    return hash_value(H.ID);
-  }
-  static bool isEqual(const clang::clangd::stdlib::Header &LHS,
-                      const clang::clangd::stdlib::Header &RHS) {
-    return LHS == RHS;
-  }
-};
-
-template <> struct DenseMapInfo<clang::clangd::stdlib::Symbol> {
-  static inline clang::clangd::stdlib::Symbol getEmptyKey() {
-    return clang::clangd::stdlib::Symbol(-1);
-  }
-  static inline clang::clangd::stdlib::Symbol getTombstoneKey() {
-    return clang::clangd::stdlib::Symbol(-2);
-  }
-  static unsigned getHashValue(const clang::clangd::stdlib::Symbol &S) {
-    return hash_value(S.ID);
-  }
-  static bool isEqual(const clang::clangd::stdlib::Symbol &LHS,
-                      const clang::clangd::stdlib::Symbol &RHS) {
-    return LHS == RHS;
-  }
-};
-
 } // namespace llvm
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp
index 04449904ecc4c..cb50804585082 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.cpp
+++ b/clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -161,7 +161,7 @@ class ReferencedLocationCrawler
   ReferencedLocations &Result;
   llvm::DenseSet<const void *> Visited;
   const SourceManager &SM;
-  stdlib::Recognizer StdRecognizer;
+  tooling::stdlib::Recognizer StdRecognizer;
 };
 
 // Given a set of referenced FileIDs, determines all the potentially-referenced
@@ -241,7 +241,7 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
   // System headers are likely to be standard library headers.
   // Until we have good support for umbrella headers, don't warn about them.
   if (Inc.Written.front() == '<') {
-    if (AnalyzeStdlib && stdlib::Header::named(Inc.Written))
+    if (AnalyzeStdlib && tooling::stdlib::Header::named(Inc.Written))
       return true;
     return false;
   }
@@ -329,7 +329,7 @@ findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM,
   for (FileID ID : Builder.Files)
     UserFiles.insert(HeaderResponsible(ID));
 
-  llvm::DenseSet<stdlib::Header> StdlibFiles;
+  llvm::DenseSet<tooling::stdlib::Header> StdlibFiles;
   for (const auto &Symbol : Locs.Stdlib)
     for (const auto &Header : Symbol.headers())
       StdlibFiles.insert(Header);
@@ -392,7 +392,7 @@ translateToHeaderIDs(const ReferencedFiles &Files,
     assert(File);
     TranslatedHeaderIDs.insert(*File);
   }
-  for (stdlib::Header StdlibUsed : Files.Stdlib)
+  for (tooling::stdlib::Header StdlibUsed : Files.Stdlib)
     for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
       TranslatedHeaderIDs.insert(HID);
   return TranslatedHeaderIDs;

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h
index 4ae2ccb94e701..ad34e3d6facdf 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.h
+++ b/clang-tools-extra/clangd/IncludeCleaner.h
@@ -24,6 +24,7 @@
 #include "Headers.h"
 #include "ParsedAST.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include <vector>
@@ -33,7 +34,7 @@ namespace clangd {
 
 struct ReferencedLocations {
   llvm::DenseSet<SourceLocation> User;
-  llvm::DenseSet<stdlib::Symbol> Stdlib;
+  llvm::DenseSet<tooling::stdlib::Symbol> Stdlib;
 };
 
 /// Finds locations of all symbols used in the main file.
@@ -57,7 +58,7 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST);
 
 struct ReferencedFiles {
   llvm::DenseSet<FileID> User;
-  llvm::DenseSet<stdlib::Header> Stdlib;
+  llvm::DenseSet<tooling::stdlib::Header> Stdlib;
 };
 
 /// Retrieves IDs of all files containing SourceLocations from \p Locs.

diff  --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
index f7269b686552b..3c85a9951e6ee 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -85,7 +85,7 @@ void CanonicalIncludes::addSystemHeadersMapping(const LangOptions &Language) {
   if (Language.CPlusPlus) {
     static const auto *Symbols = new llvm::StringMap<llvm::StringRef>({
 #define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name, #Header},
-#include "StdSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
         // There are two std::move()s, this is by far the most common.
         SYMBOL(move, std::, <utility>)
         // There are multiple headers for size_t, pick one.
@@ -96,7 +96,7 @@ void CanonicalIncludes::addSystemHeadersMapping(const LangOptions &Language) {
   } else if (Language.C11) {
     static const auto *CSymbols = new llvm::StringMap<llvm::StringRef>({
 #define SYMBOL(Name, NameSpace, Header) {#Name, #Header},
-#include "CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
         // There are multiple headers for size_t, pick one.
         SYMBOL(size_t, None, <stddef.h>)
 #undef SYMBOL

diff  --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
index d7940b997083f..2a78667ae34c4 100644
--- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
@@ -409,58 +409,6 @@ void foo();
   EXPECT_FALSE(Includes.isSelfContained(getID("pp_depend.h", Includes)));
 }
 
-TEST(StdlibTest, All) {
-  auto VectorH = stdlib::Header::named("<vector>");
-  EXPECT_TRUE(VectorH);
-  EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
-  EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
-
-  auto Vector = stdlib::Symbol::named("std::", "vector");
-  EXPECT_TRUE(Vector);
-  EXPECT_EQ(llvm::to_string(*Vector), "std::vector");
-  EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
-  EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
-
-  EXPECT_EQ(Vector->header(), *VectorH);
-  EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
-}
-
-TEST(StdlibTest, Recognizer) {
-  auto TU = TestTU::withCode(R"cpp(
-    namespace std {
-    inline namespace inl {
-
-    template <typename>
-    struct vector { class nested {}; };
-
-    class secret {};
-
-    } // inl
-    } // std
-
-    class vector {};
-    std::vector<int> vec;
-    std::vector<int>::nested nest;
-    std::secret sec;
-  )cpp");
-
-  auto AST = TU.build();
-  auto &VectorNonstd = findDecl(AST, "vector");
-  auto *Vec =
-      cast<VarDecl>(findDecl(AST, "vec")).getType()->getAsCXXRecordDecl();
-  auto *Nest =
-      cast<VarDecl>(findDecl(AST, "nest")).getType()->getAsCXXRecordDecl();
-  auto *Sec =
-      cast<VarDecl>(findDecl(AST, "sec")).getType()->getAsCXXRecordDecl();
-
-  stdlib::Recognizer Recognizer;
-
-  EXPECT_EQ(Recognizer(&VectorNonstd), llvm::None);
-  EXPECT_EQ(Recognizer(Vec), stdlib::Symbol::named("std::", "vector"));
-  EXPECT_EQ(Recognizer(Nest), stdlib::Symbol::named("std::", "vector"));
-  EXPECT_EQ(Recognizer(Sec), llvm::None);
-}
-
 } // namespace
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
index ab367fca66e11..65942fe29c72c 100644
--- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -250,16 +250,16 @@ TEST(IncludeCleaner, Stdlib) {
   for (const auto &Test : Tests) {
     TU.Code = Test.Code.str();
     ParsedAST AST = TU.build();
-    std::vector<stdlib::Symbol> WantSyms;
+    std::vector<tooling::stdlib::Symbol> WantSyms;
     for (const auto &SymName : Test.Symbols) {
       auto QName = splitQualifiedName(SymName);
-      auto Sym = stdlib::Symbol::named(QName.first, QName.second);
+      auto Sym = tooling::stdlib::Symbol::named(QName.first, QName.second);
       EXPECT_TRUE(Sym) << SymName;
       WantSyms.push_back(*Sym);
     }
-    std::vector<stdlib::Header> WantHeaders;
+    std::vector<tooling::stdlib::Header> WantHeaders;
     for (const auto &HeaderName : Test.Headers) {
-      auto Header = stdlib::Header::named(HeaderName);
+      auto Header = tooling::stdlib::Header::named(HeaderName);
       EXPECT_TRUE(Header) << HeaderName;
       WantHeaders.push_back(*Header);
     }

diff  --git a/clang-tools-extra/clangd/CSymbolMap.inc b/clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
similarity index 99%
rename from clang-tools-extra/clangd/CSymbolMap.inc
rename to clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
index ce05f23561d90..463ce921f0672 100644
--- a/clang-tools-extra/clangd/CSymbolMap.inc
+++ b/clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
@@ -3,7 +3,8 @@
 // Used to build a lookup table (qualified names => include headers) for C
 // Standard Library symbols.
 //
-// Automatically generated file, DO NOT EDIT!
+// This file was generated automatically by
+// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
 //
 // Generated from cppreference offline HTML book (modified on 2018-10-28).
 //===----------------------------------------------------------------------===//

diff  --git a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
new file mode 100644
index 0000000000000..c6ce2780dae6c
--- /dev/null
+++ b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
@@ -0,0 +1,141 @@
+//===--- StandardLibrary.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Provides an interface for querying information about C and C++ Standard
+/// Library headers and symbols.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_INCLUSIONS_STANDARDLIBRARY_H
+#define LLVM_CLANG_TOOLING_INCLUSIONS_STANDARDLIBRARY_H
+
+#include "clang/AST/Decl.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace clang {
+namespace tooling {
+namespace stdlib {
+
+class Symbol;
+
+// A standard library header, such as <iostream>
+// Lightweight class, in fact just an index into a table.
+// C++ and C Library compatibility headers are considered 
diff erent: e.g.
+// "<cstdio>" and "<stdio.h>" (and their symbols) are treated 
diff erently.
+class Header {
+public:
+  // Name should contain the angle brackets, e.g. "<vector>".
+  static llvm::Optional<Header> named(llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
+    return OS << H.name();
+  }
+  llvm::StringRef name() const;
+
+private:
+  Header(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend Symbol;
+  friend llvm::DenseMapInfo<Header>;
+  friend bool operator==(const Header &L, const Header &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A top-level standard library symbol, such as std::vector
+// Lightweight class, in fact just an index into a table.
+// C++ and C Standard Library symbols are considered distinct: e.g. std::printf
+// and ::printf are not treated as the same symbol.
+// The symbols do not contain macros right now, we don't have a reliable index
+// for them.
+class Symbol {
+public:
+  /// \p Scope should have the trailing "::", for example:
+  /// named("std::chrono::", "system_clock")
+  static llvm::Optional<Symbol> named(llvm::StringRef Scope,
+                                      llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
+    return OS << S.scope() << S.name();
+  }
+  llvm::StringRef scope() const;
+  llvm::StringRef name() const;
+  // The preferred header for this symbol (e.g. the suggested insertion).
+  Header header() const;
+  // Some symbols may be provided by multiple headers.
+  llvm::SmallVector<Header> headers() const;
+
+private:
+  Symbol(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend class Recognizer;
+  friend llvm::DenseMapInfo<Symbol>;
+  friend bool operator==(const Symbol &L, const Symbol &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A functor to find the stdlib::Symbol associated with a decl.
+//
+// For non-top-level decls (std::vector<int>::iterator), returns the top-level
+// symbol (std::vector).
+class Recognizer {
+public:
+  Recognizer();
+  llvm::Optional<Symbol> operator()(const Decl *D);
+
+private:
+  using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+  NSSymbolMap *namespaceSymbols(const NamespaceDecl *D);
+  llvm::DenseMap<const DeclContext *, NSSymbolMap *> NamespaceCache;
+};
+
+} // namespace stdlib
+} // namespace tooling
+} // namespace clang
+
+namespace llvm {
+
+template <> struct DenseMapInfo<clang::tooling::stdlib::Header> {
+  static inline clang::tooling::stdlib::Header getEmptyKey() {
+    return clang::tooling::stdlib::Header(-1);
+  }
+  static inline clang::tooling::stdlib::Header getTombstoneKey() {
+    return clang::tooling::stdlib::Header(-2);
+  }
+  static unsigned getHashValue(const clang::tooling::stdlib::Header &H) {
+    return hash_value(H.ID);
+  }
+  static bool isEqual(const clang::tooling::stdlib::Header &LHS,
+                      const clang::tooling::stdlib::Header &RHS) {
+    return LHS == RHS;
+  }
+};
+
+template <> struct DenseMapInfo<clang::tooling::stdlib::Symbol> {
+  static inline clang::tooling::stdlib::Symbol getEmptyKey() {
+    return clang::tooling::stdlib::Symbol(-1);
+  }
+  static inline clang::tooling::stdlib::Symbol getTombstoneKey() {
+    return clang::tooling::stdlib::Symbol(-2);
+  }
+  static unsigned getHashValue(const clang::tooling::stdlib::Symbol &S) {
+    return hash_value(S.ID);
+  }
+  static bool isEqual(const clang::tooling::stdlib::Symbol &LHS,
+                      const clang::tooling::stdlib::Symbol &RHS) {
+    return LHS == RHS;
+  }
+};
+} // namespace llvm
+
+#endif // LLVM_CLANG_TOOLING_INCLUSIONS_STANDARDLIBRARY_H

diff  --git a/clang-tools-extra/clangd/StdSymbolMap.inc b/clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
similarity index 99%
rename from clang-tools-extra/clangd/StdSymbolMap.inc
rename to clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
index e78d3766bcdda..e5e9065731e94 100644
--- a/clang-tools-extra/clangd/StdSymbolMap.inc
+++ b/clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
@@ -3,7 +3,8 @@
 // Used to build a lookup table (qualified names => include headers) for CPP
 // Standard Library symbols.
 //
-// Automatically generated file, DO NOT EDIT!
+// This file was generated automatically by
+// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
 //
 // Generated from cppreference offline HTML book (modified on 2018-10-28).
 //===----------------------------------------------------------------------===//

diff  --git a/clang/lib/Tooling/Inclusions/CMakeLists.txt b/clang/lib/Tooling/Inclusions/CMakeLists.txt
index 00afb50f3a690..ee884a0b74126 100644
--- a/clang/lib/Tooling/Inclusions/CMakeLists.txt
+++ b/clang/lib/Tooling/Inclusions/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS support)
 add_clang_library(clangToolingInclusions
   HeaderIncludes.cpp
   IncludeStyle.cpp
+  StandardLibrary.cpp
 
   LINK_LIBS
   clangBasic

diff  --git a/clang/lib/Tooling/Inclusions/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/StandardLibrary.cpp
new file mode 100644
index 0000000000000..8fb0c8474e640
--- /dev/null
+++ b/clang/lib/Tooling/Inclusions/StandardLibrary.cpp
@@ -0,0 +1,165 @@
+//===--- StandardLibrary.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+
+namespace clang {
+namespace tooling {
+namespace stdlib {
+
+static llvm::StringRef *HeaderNames;
+static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
+static unsigned *SymbolHeaderIDs;
+static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
+// Maps symbol name -> Symbol::ID, within a namespace.
+using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
+
+static int initialize() {
+  unsigned SymCount = 0;
+#define SYMBOL(Name, NS, Header) ++SymCount;
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
+  SymbolHeaderIDs =
+      new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
+  NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
+  HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
+
+  auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
+    auto R = NamespaceSymbols->try_emplace(NS, nullptr);
+    if (R.second)
+      R.first->second = new NSSymbolMap();
+    return *R.first->second;
+  };
+
+  auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
+    return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
+  };
+
+  auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
+                              llvm::StringRef HeaderName) mutable {
+    if (NS == "None")
+      NS = "";
+
+    SymbolNames[SymIndex] = {NS, Name};
+    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
+
+    NSSymbolMap &NSSymbols = AddNS(NS);
+    NSSymbols.try_emplace(Name, SymIndex);
+
+    ++SymIndex;
+  };
+#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+
+  HeaderNames = new llvm::StringRef[HeaderIDs->size()];
+  for (const auto &E : *HeaderIDs)
+    HeaderNames[E.second] = E.first;
+
+  return 0;
+}
+
+static void ensureInitialized() {
+  static int Dummy = initialize();
+  (void)Dummy;
+}
+
+llvm::Optional<Header> Header::named(llvm::StringRef Name) {
+  ensureInitialized();
+  auto It = HeaderIDs->find(Name);
+  if (It == HeaderIDs->end())
+    return llvm::None;
+  return Header(It->second);
+}
+llvm::StringRef Header::name() const { return HeaderNames[ID]; }
+llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
+llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
+llvm::Optional<Symbol> Symbol::named(llvm::StringRef Scope,
+                                     llvm::StringRef Name) {
+  ensureInitialized();
+  if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
+    auto It = NSSymbols->find(Name);
+    if (It != NSSymbols->end())
+      return Symbol(It->second);
+  }
+  return llvm::None;
+}
+Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
+llvm::SmallVector<Header> Symbol::headers() const {
+  return {header()}; // FIXME: multiple in case of ambiguity
+}
+
+Recognizer::Recognizer() { ensureInitialized(); }
+
+NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
+  auto It = NamespaceCache.find(D);
+  if (It != NamespaceCache.end())
+    return It->second;
+
+  NSSymbolMap *Result = [&]() -> NSSymbolMap * {
+    if (D && D->isAnonymousNamespace())
+      return nullptr;
+    // Print the namespace and its parents ommitting inline scopes.
+    std::string Scope;
+    for (const auto *ND = D; ND;
+         ND = llvm::dyn_cast_or_null<NamespaceDecl>(ND->getParent()))
+      if (!ND->isInlineNamespace() && !ND->isAnonymousNamespace())
+        Scope = ND->getName().str() + "::" + Scope;
+    return NamespaceSymbols->lookup(Scope);
+  }();
+  NamespaceCache.try_emplace(D, Result);
+  return Result;
+}
+
+llvm::Optional<Symbol> Recognizer::operator()(const Decl *D) {
+  // If D is std::vector::iterator, `vector` is the outer symbol to look up.
+  // We keep all the candidate DCs as some may turn out to be anon enums.
+  // Do this resolution lazily as we may turn out not to have a std namespace.
+  llvm::SmallVector<const DeclContext *> IntermediateDecl;
+  const DeclContext *DC = D->getDeclContext();
+  while (DC && !DC->isNamespace()) {
+    if (NamedDecl::classofKind(DC->getDeclKind()))
+      IntermediateDecl.push_back(DC);
+    DC = DC->getParent();
+  }
+  NSSymbolMap *Symbols = namespaceSymbols(cast_or_null<NamespaceDecl>(DC));
+  if (!Symbols)
+    return llvm::None;
+
+  llvm::StringRef Name = [&]() -> llvm::StringRef {
+    for (const auto *SymDC : llvm::reverse(IntermediateDecl)) {
+      DeclarationName N = cast<NamedDecl>(SymDC)->getDeclName();
+      if (const auto *II = N.getAsIdentifierInfo())
+        return II->getName();
+      if (!N.isEmpty())
+        return ""; // e.g. operator<: give up
+    }
+    if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
+      if (const auto *II = ND->getIdentifier())
+        return II->getName();
+    return "";
+  }();
+  if (Name.empty())
+    return llvm::None;
+
+  auto It = Symbols->find(Name);
+  if (It == Symbols->end())
+    return llvm::None;
+  return Symbol(It->second);
+}
+
+} // namespace stdlib
+} // namespace tooling
+} // namespace clang

diff  --git a/clang-tools-extra/clangd/include-mapping/cppreference_parser.py b/clang/tools/include-mapping/cppreference_parser.py
similarity index 100%
rename from clang-tools-extra/clangd/include-mapping/cppreference_parser.py
rename to clang/tools/include-mapping/cppreference_parser.py

diff  --git a/clang-tools-extra/clangd/include-mapping/gen_std.py b/clang/tools/include-mapping/gen_std.py
similarity index 96%
rename from clang-tools-extra/clangd/include-mapping/gen_std.py
rename to clang/tools/include-mapping/gen_std.py
index 0a6fd8a6f9116..919b5c8e97bae 100755
--- a/clang-tools-extra/clangd/include-mapping/gen_std.py
+++ b/clang/tools/include-mapping/gen_std.py
@@ -11,6 +11,8 @@
 include headers) for C/C++ Standard Library symbols by parsing archieved HTML
 files from cppreference.
 
+The generated files are located in clang/include/Tooling/Inclusions.
+
 Caveats and FIXMEs:
   - only symbols directly in "std" namespace are added, we should also add std's
     subnamespace symbols (e.g. chrono).
@@ -44,7 +46,8 @@
 // Used to build a lookup table (qualified names => include headers) for %s
 // Standard Library symbols.
 //
-// Automatically generated file, DO NOT EDIT!
+// This file was generated automatically by
+// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
 //
 // Generated from cppreference offline HTML book (modified on %s).
 //===----------------------------------------------------------------------===//

diff  --git a/clang-tools-extra/clangd/include-mapping/test.py b/clang/tools/include-mapping/test.py
similarity index 100%
rename from clang-tools-extra/clangd/include-mapping/test.py
rename to clang/tools/include-mapping/test.py

diff  --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt
index 8f8142a0cc561..b532e44cea668 100644
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -17,6 +17,7 @@ add_clang_unittest(ToolingTests
   ExecutionTest.cpp
   FixItTest.cpp
   HeaderIncludesTest.cpp
+  StandardLibraryTest.cpp
   LexicallyOrderedRecursiveASTVisitorTest.cpp
   LookupTest.cpp
   QualTypeNamesTest.cpp

diff  --git a/clang/unittests/Tooling/StandardLibraryTest.cpp b/clang/unittests/Tooling/StandardLibraryTest.cpp
new file mode 100644
index 0000000000000..617104c37c510
--- /dev/null
+++ b/clang/unittests/Tooling/StandardLibraryTest.cpp
@@ -0,0 +1,111 @@
+//===- unittest/Tooling/StandardLibrary.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/Frontend/ASTUnit.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using ::testing::ElementsAre;
+
+namespace clang {
+namespace tooling {
+namespace {
+
+const NamedDecl &lookup(ASTUnit &AST, llvm::StringRef Name) {
+  auto &Ctx = AST.getASTContext();
+  TranslationUnitDecl *TU = Ctx.getTranslationUnitDecl();
+  auto Result = TU->lookup(DeclarationName(&Ctx.Idents.get(Name)));
+  assert(!Result.empty() && "Lookup failed");
+  assert(Result.isSingleResult() && "Lookup returned multiple results");
+  return *Result.front();
+}
+
+TEST(StdlibTest, All) {
+  auto VectorH = stdlib::Header::named("<vector>");
+  EXPECT_TRUE(VectorH);
+  EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
+  EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
+
+  auto Vector = stdlib::Symbol::named("std::", "vector");
+  EXPECT_TRUE(Vector);
+  EXPECT_EQ(llvm::to_string(*Vector), "std::vector");
+  EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
+  EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
+
+  EXPECT_EQ(Vector->header(), *VectorH);
+  EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
+}
+
+TEST(StdlibTest, Recognizer) {
+  std::unique_ptr<ASTUnit> AST = buildASTFromCode(R"cpp(
+    namespace std {
+    inline namespace inl {
+
+    template <typename>
+    struct vector { class nested {}; };
+
+    class secret {};
+
+    } // inl
+
+    inline namespace __1 {
+      namespace chrono {
+        inline namespace chrono_inl {
+        class system_clock {};
+        } // chrono_inl
+      } // chrono
+    } // __1
+
+    } // std
+
+    // C Standard Library structure defined in <stdlib.h>
+    struct div_t {};
+
+    class vector {};
+    std::vector<int> vec;
+    std::vector<int>::nested nest;
+    std::secret sec;
+    std::chrono::system_clock clock;
+
+    div_t div;
+  )cpp");
+
+  auto &VectorNonstd = lookup(*AST, "vector");
+  auto *Vec =
+      cast<VarDecl>(lookup(*AST, "vec")).getType()->getAsCXXRecordDecl();
+  auto *Nest =
+      cast<VarDecl>(lookup(*AST, "nest")).getType()->getAsCXXRecordDecl();
+  auto *Clock =
+      cast<VarDecl>(lookup(*AST, "clock")).getType()->getAsCXXRecordDecl();
+  auto *Sec =
+      cast<VarDecl>(lookup(*AST, "sec")).getType()->getAsCXXRecordDecl();
+  auto *CDivT =
+      cast<VarDecl>(lookup(*AST, "div")).getType()->getAsCXXRecordDecl();
+
+  stdlib::Recognizer Recognizer;
+
+  EXPECT_EQ(Recognizer(&VectorNonstd), llvm::None);
+  EXPECT_EQ(Recognizer(Vec), stdlib::Symbol::named("std::", "vector"));
+  EXPECT_EQ(Recognizer(Nest), stdlib::Symbol::named("std::", "vector"));
+  EXPECT_EQ(Recognizer(Clock),
+            stdlib::Symbol::named("std::chrono::", "system_clock"));
+  EXPECT_EQ(Recognizer(CDivT), stdlib::Symbol::named("", "div_t"));
+  EXPECT_EQ(Recognizer(Sec), llvm::None);
+}
+
+} // namespace
+} // namespace tooling
+} // namespace clang


        


More information about the cfe-commits mailing list