[clang-tools-extra] 478863e - [clangd] Basic IncludeCleaner support for c/c++ standard library

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 3 09:20:05 PST 2022


Author: Sam McCall
Date: 2022-01-03T18:19:56+01:00
New Revision: 478863ef58c7f7314e0669d332a90d6e233d44fb

URL: https://github.com/llvm/llvm-project/commit/478863ef58c7f7314e0669d332a90d6e233d44fb
DIFF: https://github.com/llvm/llvm-project/commit/478863ef58c7f7314e0669d332a90d6e233d44fb.diff

LOG: [clangd] Basic IncludeCleaner support for c/c++ standard library

There are some limitations here, so this is behind a flag for now (in addition
to the config setting for the overall feature).

- symbols without exactly one associated header aren't handled right
- no macro support
- referencing std::size_t usually doesn't leave any trace in the AST that the
  alias in std was used, so we associate with stddef.h instead of cstddef.
  (An AST issue not specific to stdlib, but much worse there)

Differential Revision: https://reviews.llvm.org/D114077

Added: 
    

Modified: 
    clang-tools-extra/clangd/Headers.cpp
    clang-tools-extra/clangd/Headers.h
    clang-tools-extra/clangd/IncludeCleaner.cpp
    clang-tools-extra/clangd/IncludeCleaner.h
    clang-tools-extra/clangd/tool/ClangdMain.cpp
    clang-tools-extra/clangd/unittests/HeadersTests.cpp
    clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp
index 30cca7448be25..72da1be99283c 100644
--- a/clang-tools-extra/clangd/Headers.cpp
+++ b/clang-tools-extra/clangd/Headers.cpp
@@ -61,10 +61,19 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
           SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1;
       Inc.FileKind = FileKind;
       Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID();
-      if (File)
-        Inc.HeaderID = static_cast<unsigned>(Out->getOrCreateID(File));
       if (LastPragmaKeepInMainFileLine == Inc.HashLine)
         Inc.BehindPragmaKeep = true;
+      if (File) {
+        IncludeStructure::HeaderID HID = Out->getOrCreateID(File);
+        Inc.HeaderID = static_cast<unsigned>(HID);
+        if (IsAngled)
+          if (auto StdlibHeader = stdlib::Header::named(Inc.Written)) {
+            auto &IDs = Out->StdlibHeaders[*StdlibHeader];
+            // Few physical files for one stdlib header name, linear scan is ok.
+            if (!llvm::is_contained(IDs, HID))
+              IDs.push_back(HID);
+          }
+      }
     }
 
     // Record include graph (not just for main-file includes)
@@ -340,5 +349,155 @@ bool operator==(const Inclusion &LHS, const Inclusion &RHS) {
          std::tie(RHS.Directive, RHS.FileKind, RHS.HashOffset, RHS.HashLine,
                   RHS.Resolved, RHS.Written);
 }
+
+namespace stdlib {
+static llvm::StringRef *HeaderNames;
+static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
+static unsigned *SymbolHeaderIDs;
+static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
+// Maps symbol name -> Symbol::ID, within a namespace.
+using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
+
+static int initialize() {
+  unsigned SymCount = 0;
+#define SYMBOL(Name, NS, Header) ++SymCount;
+#include "CSymbolMap.inc"
+#include "StdSymbolMap.inc"
+#undef SYMBOL
+  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
+  SymbolHeaderIDs =
+      new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
+  NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
+  HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
+
+  auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
+    auto R = NamespaceSymbols->try_emplace(NS, nullptr);
+    if (R.second)
+      R.first->second = new NSSymbolMap();
+    return *R.first->second;
+  };
+
+  auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
+    return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
+  };
+
+  auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
+                              llvm::StringRef HeaderName) mutable {
+    if (NS == "None")
+      NS = "";
+
+    SymbolNames[SymIndex] = {NS, Name};
+    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
+
+    NSSymbolMap &NSSymbols = AddNS(NS);
+    NSSymbols.try_emplace(Name, SymIndex);
+
+    ++SymIndex;
+  };
+#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
+#include "CSymbolMap.inc"
+#include "StdSymbolMap.inc"
+#undef SYMBOL
+
+  HeaderNames = new llvm::StringRef[HeaderIDs->size()];
+  for (const auto &E : *HeaderIDs)
+    HeaderNames[E.second] = E.first;
+
+  return 0;
+}
+
+static void ensureInitialized() {
+  static int Dummy = initialize();
+  (void)Dummy;
+}
+
+llvm::Optional<Header> Header::named(llvm::StringRef Name) {
+  ensureInitialized();
+  auto It = HeaderIDs->find(Name);
+  if (It == HeaderIDs->end())
+    return llvm::None;
+  return Header(It->second);
+}
+llvm::StringRef Header::name() const { return HeaderNames[ID]; }
+llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
+llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
+llvm::Optional<Symbol> Symbol::named(llvm::StringRef Scope,
+                                     llvm::StringRef Name) {
+  ensureInitialized();
+  if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
+    auto It = NSSymbols->find(Name);
+    if (It != NSSymbols->end())
+      return Symbol(It->second);
+  }
+  return llvm::None;
+}
+Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
+llvm::SmallVector<Header> Symbol::headers() const {
+  return {header()}; // FIXME: multiple in case of ambiguity
+}
+
+Recognizer::Recognizer() { ensureInitialized(); }
+
+NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
+  auto It = NamespaceCache.find(D);
+  if (It != NamespaceCache.end())
+    return It->second;
+
+  NSSymbolMap *Result = [&]() -> NSSymbolMap * {
+    if (!D) // Nullptr means the global namespace
+      return NamespaceSymbols->lookup("");
+    if (D->isAnonymousNamespace())
+      return nullptr;
+    if (D->isInlineNamespace()) {
+      if (auto *Parent = llvm::dyn_cast_or_null<NamespaceDecl>(D->getParent()))
+        return namespaceSymbols(Parent);
+      return nullptr;
+    }
+    return NamespaceSymbols->lookup(printNamespaceScope(*D));
+  }();
+  NamespaceCache.try_emplace(D, Result);
+  return Result;
+}
+
+llvm::Optional<Symbol> Recognizer::operator()(const Decl *D) {
+  // If D is std::vector::iterator, `vector` is the outer symbol to look up.
+  // We keep all the candidate DCs as some may turn out to be anon enums.
+  // Do this resolution lazily as we may turn out not to have a std namespace.
+  llvm::SmallVector<const DeclContext *> IntermediateDecl;
+  const DeclContext *DC = D->getDeclContext();
+  while (DC && !DC->isNamespace()) {
+    if (NamedDecl::classofKind(DC->getDeclKind()))
+      IntermediateDecl.push_back(DC);
+    DC = DC->getParent();
+  }
+  NSSymbolMap *Symbols = namespaceSymbols(cast_or_null<NamespaceDecl>(DC));
+  if (!Symbols)
+    return llvm::None;
+
+  llvm::StringRef Name = [&]() -> llvm::StringRef {
+    for (const auto *SymDC : llvm::reverse(IntermediateDecl)) {
+      DeclarationName N = cast<NamedDecl>(SymDC)->getDeclName();
+      if (const auto *II = N.getAsIdentifierInfo())
+        return II->getName();
+      if (!N.isEmpty())
+        return ""; // e.g. operator<: give up
+    }
+    if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
+      if (const auto *II = ND->getIdentifier())
+        return II->getName();
+    return "";
+  }();
+  if (Name.empty())
+    return llvm::None;
+
+  auto It = Symbols->find(Name);
+  if (It == Symbols->end())
+    return llvm::None;
+  return Symbol(It->second);
+}
+
+} // namespace stdlib
+
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h
index c5f5746f25770..9612ce8def466 100644
--- a/clang-tools-extra/clangd/Headers.h
+++ b/clang-tools-extra/clangd/Headers.h
@@ -32,8 +32,78 @@
 #include <string>
 
 namespace clang {
+class Decl;
+class NamespaceDecl;
 namespace clangd {
 
+// clangd has a built-in database of standard library symbols.
+namespace stdlib {
+
+// A standard library header, such as <iostream>
+// Lightweight class, in fact just an index into a table.
+class Header {
+public:
+  static llvm::Optional<Header> named(llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
+    return OS << H.name();
+  }
+  llvm::StringRef name() const;
+
+private:
+  Header(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend class Symbol;
+  friend llvm::DenseMapInfo<Header>;
+  friend bool operator==(const Header &L, const Header &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A top-level standard library symbol, such as std::vector
+// Lightweight class, in fact just an index into a table.
+class Symbol {
+public:
+  static llvm::Optional<Symbol> named(llvm::StringRef Scope,
+                                      llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
+    return OS << S.scope() << S.name();
+  }
+  llvm::StringRef scope() const;
+  llvm::StringRef name() const;
+  // The preferred header for this symbol (e.g. the suggested insertion).
+  Header header() const;
+  // Some symbols may be provided my multiple headers.
+  llvm::SmallVector<Header> headers() const;
+
+private:
+  Symbol(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend class Recognizer;
+  friend llvm::DenseMapInfo<Symbol>;
+  friend bool operator==(const Symbol &L, const Symbol &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A functor to find the stdlib::Symbol associated with a decl.
+//
+// For non-top-level decls (std::vector<int>::iterator), returns the top-level
+// symbol (std::vector).
+class Recognizer {
+public:
+  Recognizer();
+  llvm::Optional<Symbol> operator()(const Decl *D);
+
+private:
+  using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+  NSSymbolMap *namespaceSymbols(const NamespaceDecl *D);
+  llvm::DenseMap<const DeclContext *, NSSymbolMap *> NamespaceCache;
+};
+
+} // namespace stdlib
+
 /// Returns true if \p Include is literal include like "path" or <path>.
 bool isLiteralInclude(llvm::StringRef Include);
 
@@ -160,6 +230,8 @@ class IncludeStructure {
   // Maps HeaderID to the ids of the files included from it.
   llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren;
 
+  llvm::DenseMap<stdlib::Header, llvm::SmallVector<HeaderID>> StdlibHeaders;
+
   std::vector<Inclusion> MainFileIncludes;
 
   // We reserve HeaderID(0) for the main file and will manually check for that
@@ -250,13 +322,11 @@ namespace llvm {
 // Support HeaderIDs as DenseMap keys.
 template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> {
   static inline clang::clangd::IncludeStructure::HeaderID getEmptyKey() {
-    return static_cast<clang::clangd::IncludeStructure::HeaderID>(
-        DenseMapInfo<unsigned>::getEmptyKey());
+    return static_cast<clang::clangd::IncludeStructure::HeaderID>(-1);
   }
 
   static inline clang::clangd::IncludeStructure::HeaderID getTombstoneKey() {
-    return static_cast<clang::clangd::IncludeStructure::HeaderID>(
-        DenseMapInfo<unsigned>::getTombstoneKey());
+    return static_cast<clang::clangd::IncludeStructure::HeaderID>(-2);
   }
 
   static unsigned
@@ -270,6 +340,38 @@ template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> {
   }
 };
 
+template <> struct DenseMapInfo<clang::clangd::stdlib::Header> {
+  static inline clang::clangd::stdlib::Header getEmptyKey() {
+    return clang::clangd::stdlib::Header(-1);
+  }
+  static inline clang::clangd::stdlib::Header getTombstoneKey() {
+    return clang::clangd::stdlib::Header(-2);
+  }
+  static unsigned getHashValue(const clang::clangd::stdlib::Header &H) {
+    return hash_value(H.ID);
+  }
+  static bool isEqual(const clang::clangd::stdlib::Header &LHS,
+                      const clang::clangd::stdlib::Header &RHS) {
+    return LHS == RHS;
+  }
+};
+
+template <> struct DenseMapInfo<clang::clangd::stdlib::Symbol> {
+  static inline clang::clangd::stdlib::Symbol getEmptyKey() {
+    return clang::clangd::stdlib::Symbol(-1);
+  }
+  static inline clang::clangd::stdlib::Symbol getTombstoneKey() {
+    return clang::clangd::stdlib::Symbol(-2);
+  }
+  static unsigned getHashValue(const clang::clangd::stdlib::Symbol &S) {
+    return hash_value(S.ID);
+  }
+  static bool isEqual(const clang::clangd::stdlib::Symbol &LHS,
+                      const clang::clangd::stdlib::Symbol &RHS) {
+    return LHS == RHS;
+  }
+};
+
 } // namespace llvm
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp
index 9e51f5430be80..85ba59519e8ad 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.cpp
+++ b/clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -26,6 +26,10 @@
 
 namespace clang {
 namespace clangd {
+
+static bool AnalyzeStdlib = false;
+void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
+
 namespace {
 
 /// Crawler traverses the AST and feeds in the locations of (sometimes
@@ -127,6 +131,10 @@ class ReferencedLocationCrawler
   void add(const Decl *D) {
     if (!D || !isNew(D->getCanonicalDecl()))
       return;
+    if (auto SS = StdRecognizer(D)) {
+      Result.Stdlib.insert(*SS);
+      return;
+    }
     // Special case RecordDecls, as it is common for them to be forward
     // declared multiple times. The most common cases are:
     // - Definition available in TU, only mark that one as usage. The rest is
@@ -136,14 +144,14 @@ class ReferencedLocationCrawler
     //   redecls.
     if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
       if (const auto *Definition = RD->getDefinition()) {
-        Result.insert(Definition->getLocation());
+        Result.User.insert(Definition->getLocation());
         return;
       }
       if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation()))
         return;
     }
     for (const Decl *Redecl : D->redecls())
-      Result.insert(Redecl->getLocation());
+      Result.User.insert(Redecl->getLocation());
   }
 
   bool isNew(const void *P) { return P && Visited.insert(P).second; }
@@ -151,13 +159,14 @@ class ReferencedLocationCrawler
   ReferencedLocations &Result;
   llvm::DenseSet<const void *> Visited;
   const SourceManager &SM;
+  stdlib::Recognizer StdRecognizer;
 };
 
 // Given a set of referenced FileIDs, determines all the potentially-referenced
 // files and macros by traversing expansion/spelling locations of macro IDs.
 // This is used to map the referenced SourceLocations onto real files.
-struct ReferencedFiles {
-  ReferencedFiles(const SourceManager &SM) : SM(SM) {}
+struct ReferencedFilesBuilder {
+  ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {}
   llvm::DenseSet<FileID> Files;
   llvm::DenseSet<FileID> Macros;
   const SourceManager &SM;
@@ -218,18 +227,23 @@ void findReferencedMacros(ParsedAST &AST, ReferencedLocations &Result) {
       continue;
     auto Loc = Macro->Info->getDefinitionLoc();
     if (Loc.isValid())
-      Result.insert(Loc);
+      Result.User.insert(Loc);
+    // FIXME: support stdlib macros
   }
 }
 
-bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
+static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
+  if (Inc.BehindPragmaKeep)
+    return false;
+
   // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
-  // Standard Library headers are typically umbrella headers, and system
-  // headers are likely to be the Standard Library headers. Until we have a
-  // good support for umbrella headers and Standard Library headers, don't warn
-  // about them.
-  if (Inc.Written.front() == '<' || Inc.BehindPragmaKeep)
+  // System headers are likely to be standard library headers.
+  // Until we have good support for umbrella headers, don't warn about them.
+  if (Inc.Written.front() == '<') {
+    if (AnalyzeStdlib && stdlib::Header::named(Inc.Written))
+      return true;
     return false;
+  }
   // Headers without include guards have side effects and are not
   // self-contained, skip them.
   assert(Inc.HeaderID);
@@ -282,29 +296,36 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST) {
   return Result;
 }
 
-llvm::DenseSet<FileID>
-findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
-                    const IncludeStructure &Includes, const SourceManager &SM) {
-  std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
+ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
+                                    const IncludeStructure &Includes,
+                                    const SourceManager &SM) {
+  std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
   llvm::sort(Sorted); // Group by FileID.
-  ReferencedFiles Files(SM);
+  ReferencedFilesBuilder Builder(SM);
   for (auto It = Sorted.begin(); It < Sorted.end();) {
     FileID FID = SM.getFileID(*It);
-    Files.add(FID, *It);
+    Builder.add(FID, *It);
     // Cheaply skip over all the other locations from the same FileID.
     // This avoids lots of redundant Loc->File lookups for the same file.
     do
       ++It;
     while (It != Sorted.end() && SM.isInFileID(*It, FID));
   }
+
   // If a header is not self-contained, we consider its symbols a logical part
   // of the including file. Therefore, mark the parents of all used
   // non-self-contained FileIDs as used. Perform this on FileIDs rather than
   // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
-  llvm::DenseSet<FileID> Result;
-  for (FileID ID : Files.Files)
-    Result.insert(headerResponsible(ID, SM, Includes));
-  return Result;
+  llvm::DenseSet<FileID> UserFiles;
+  for (FileID ID : Builder.Files)
+    UserFiles.insert(headerResponsible(ID, SM, Includes));
+
+  llvm::DenseSet<stdlib::Header> StdlibFiles;
+  for (const auto &Symbol : Locs.Stdlib)
+    for (const auto &Header : Symbol.headers())
+      StdlibFiles.insert(Header);
+
+  return {std::move(UserFiles), std::move(StdlibFiles)};
 }
 
 std::vector<const Inclusion *>
@@ -338,13 +359,13 @@ static bool isSpecialBuffer(FileID FID, const SourceManager &SM) {
 #endif
 
 llvm::DenseSet<IncludeStructure::HeaderID>
-translateToHeaderIDs(const llvm::DenseSet<FileID> &Files,
+translateToHeaderIDs(const ReferencedFiles &Files,
                      const IncludeStructure &Includes,
                      const SourceManager &SM) {
   trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
   llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
-  TranslatedHeaderIDs.reserve(Files.size());
-  for (FileID FID : Files) {
+  TranslatedHeaderIDs.reserve(Files.User.size());
+  for (FileID FID : Files.User) {
     const FileEntry *FE = SM.getFileEntryForID(FID);
     if (!FE) {
       assert(isSpecialBuffer(FID, SM));
@@ -354,6 +375,9 @@ translateToHeaderIDs(const llvm::DenseSet<FileID> &Files,
     assert(File);
     TranslatedHeaderIDs.insert(*File);
   }
+  for (stdlib::Header StdlibUsed : Files.Stdlib)
+    for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
+      TranslatedHeaderIDs.insert(HID);
   return TranslatedHeaderIDs;
 }
 

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h
index 368cc70323278..198de95ea2fda 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.h
+++ b/clang-tools-extra/clangd/IncludeCleaner.h
@@ -30,7 +30,11 @@
 namespace clang {
 namespace clangd {
 
-using ReferencedLocations = llvm::DenseSet<SourceLocation>;
+struct ReferencedLocations {
+  llvm::DenseSet<SourceLocation> User;
+  llvm::DenseSet<stdlib::Symbol> Stdlib;
+};
+
 /// Finds locations of all symbols used in the main file.
 ///
 /// - RecursiveASTVisitor finds references to symbols and records their
@@ -48,17 +52,22 @@ using ReferencedLocations = llvm::DenseSet<SourceLocation>;
 /// - err on the side of reporting all possible locations
 ReferencedLocations findReferencedLocations(ParsedAST &AST);
 
+struct ReferencedFiles {
+  llvm::DenseSet<FileID> User;
+  llvm::DenseSet<stdlib::Header> Stdlib;
+};
+
 /// Retrieves IDs of all files containing SourceLocations from \p Locs.
 /// The output only includes things SourceManager sees as files (not macro IDs).
 /// This can include <built-in>, <scratch space> etc that are not true files.
-llvm::DenseSet<FileID> findReferencedFiles(const ReferencedLocations &Locs,
-                                           const IncludeStructure &Includes,
-                                           const SourceManager &SM);
+ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
+                                    const IncludeStructure &Includes,
+                                    const SourceManager &SM);
 
 /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).
 /// FileIDs that are not true files (<built-in> etc) are dropped.
 llvm::DenseSet<IncludeStructure::HeaderID>
-translateToHeaderIDs(const llvm::DenseSet<FileID> &Files,
+translateToHeaderIDs(const ReferencedFiles &Files,
                      const IncludeStructure &Includes, const SourceManager &SM);
 
 /// Retrieves headers that are referenced from the main file but not used.
@@ -72,6 +81,15 @@ std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST);
 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
                                                  llvm::StringRef Code);
 
+/// Affects whether standard library includes should be considered for removal.
+/// This is off by default for now due to implementation limitations:
+/// - macros are not tracked
+/// - symbol names without a unique associated header are not tracked
+/// - references to std-namespaced C types are not properly tracked:
+///   instead of std::size_t -> <cstddef> we see ::size_t -> <stddef.h>
+/// FIXME: remove this hack once the implementation is good enough.
+void setIncludeCleanerAnalyzesStdlib(bool B);
+
 } // namespace clangd
 } // namespace clang
 

diff  --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index 08631a31eda6c..ba38aeed87a16 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -12,6 +12,7 @@
 #include "Config.h"
 #include "ConfigProvider.h"
 #include "Feature.h"
+#include "IncludeCleaner.h"
 #include "PathMapping.h"
 #include "Protocol.h"
 #include "TidyProvider.h"
@@ -251,6 +252,15 @@ opt<CodeCompleteOptions::IncludeInsertion> HeaderInsertion{
             "Never insert #include directives as part of code completion")),
 };
 
+opt<bool> IncludeCleanerStdlib{
+    "include-cleaner-stdlib",
+    cat(Features),
+    desc("Apply include-cleaner analysis to standard library headers "
+         "(immature!)"),
+    init(false),
+    Hidden,
+};
+
 opt<bool> HeaderInsertionDecorators{
     "header-insertion-decorators",
     cat(Features),
@@ -932,6 +942,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var
   };
   if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding)
     Opts.Encoding = ForceOffsetEncoding;
+  setIncludeCleanerAnalyzesStdlib(IncludeCleanerStdlib);
 
   if (CheckFile.getNumOccurrences()) {
     llvm::SmallString<256> Path;

diff  --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
index 22caa59e33202..738a2fc18b2df 100644
--- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -408,6 +409,58 @@ void foo();
   EXPECT_FALSE(Includes.isSelfContained(getID("pp_depend.h", Includes)));
 }
 
+TEST(StdlibTest, All) {
+  auto VectorH = stdlib::Header::named("<vector>");
+  EXPECT_TRUE(VectorH);
+  EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
+  EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
+
+  auto Vector = stdlib::Symbol::named("std::", "vector");
+  EXPECT_TRUE(Vector);
+  EXPECT_EQ(llvm::to_string(*Vector), "std::vector");
+  EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
+  EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
+
+  EXPECT_EQ(Vector->header(), *VectorH);
+  EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
+}
+
+TEST(StdlibTest, Recognizer) {
+  auto TU = TestTU::withCode(R"cpp(
+    namespace std {
+    inline namespace inl {
+
+    template <typename>
+    struct vector { class nested {}; };
+
+    class secret {};
+
+    } // inl
+    } // std
+
+    class vector {};
+    std::vector<int> vec;
+    std::vector<int>::nested nest;
+    std::secret sec;
+  )cpp");
+
+  auto AST = TU.build();
+  auto &vector_nonstd = findDecl(AST, "vector");
+  auto *vec =
+      cast<VarDecl>(findDecl(AST, "vec")).getType()->getAsCXXRecordDecl();
+  auto *nest =
+      cast<VarDecl>(findDecl(AST, "nest")).getType()->getAsCXXRecordDecl();
+  auto *sec =
+      cast<VarDecl>(findDecl(AST, "sec")).getType()->getAsCXXRecordDecl();
+
+  stdlib::Recognizer recognizer;
+
+  EXPECT_EQ(recognizer(&vector_nonstd), llvm::None);
+  EXPECT_EQ(recognizer(vec), stdlib::Symbol::named("std::", "vector"));
+  EXPECT_EQ(recognizer(nest), stdlib::Symbol::named("std::", "vector"));
+  EXPECT_EQ(recognizer(sec), llvm::None);
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
index 12bcc9440f2de..b7792ca6f90da 100644
--- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -8,7 +8,9 @@
 
 #include "Annotations.h"
 #include "IncludeCleaner.h"
+#include "SourceCode.h"
 #include "TestTU.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -18,7 +20,9 @@ namespace clangd {
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
 using ::testing::IsEmpty;
+using ::testing::Pointee;
 using ::testing::UnorderedElementsAre;
 
 std::string guard(llvm::StringRef Code) {
@@ -211,7 +215,7 @@ TEST(IncludeCleaner, ReferencedLocations) {
     auto AST = TU.build();
 
     std::vector<Position> Points;
-    for (const auto &Loc : findReferencedLocations(AST)) {
+    for (const auto &Loc : findReferencedLocations(AST).User) {
       if (AST.getSourceManager().getBufferName(Loc).endswith(
               TU.HeaderFilename)) {
         Points.push_back(offsetToPosition(
@@ -225,6 +229,82 @@ TEST(IncludeCleaner, ReferencedLocations) {
   }
 }
 
+TEST(IncludeCleaner, Stdlib) {
+  // Smoke tests only for finding used symbols/headers.
+  // Details of Decl -> stdlib::Symbol -> stdlib::Headers mapping tested there.
+  auto TU = TestTU::withHeaderCode(R"cpp(
+    namespace std { class error_code {}; }
+    class error_code {};
+    namespace nonstd { class error_code {}; }
+  )cpp");
+  struct {
+    llvm::StringRef Code;
+    std::vector<llvm::StringRef> Symbols;
+    std::vector<llvm::StringRef> Headers;
+  } Tests[] = {
+      {"std::error_code x;", {"std::error_code"}, {"<system_error>"}},
+      {"error_code x;", {}, {}},
+      {"nonstd::error_code x;", {}, {}},
+  };
+
+  for (const auto &Test : Tests) {
+    TU.Code = Test.Code.str();
+    ParsedAST AST = TU.build();
+    std::vector<stdlib::Symbol> WantSyms;
+    for (const auto &SymName : Test.Symbols) {
+      auto QName = splitQualifiedName(SymName);
+      auto Sym = stdlib::Symbol::named(QName.first, QName.second);
+      EXPECT_TRUE(Sym) << SymName;
+      WantSyms.push_back(*Sym);
+    }
+    std::vector<stdlib::Header> WantHeaders;
+    for (const auto &HeaderName : Test.Headers) {
+      auto Header = stdlib::Header::named(HeaderName);
+      EXPECT_TRUE(Header) << HeaderName;
+      WantHeaders.push_back(*Header);
+    }
+
+    ReferencedLocations Locs = findReferencedLocations(AST);
+    EXPECT_THAT(Locs.Stdlib, ElementsAreArray(WantSyms));
+    ReferencedFiles Files = findReferencedFiles(Locs, AST.getIncludeStructure(),
+                                                AST.getSourceManager());
+    EXPECT_THAT(Files.Stdlib, ElementsAreArray(WantHeaders));
+  }
+}
+
+MATCHER_P(WrittenInclusion, Written, "") {
+  if (arg.Written != Written)
+    *result_listener << arg.Written;
+  return arg.Written == Written;
+}
+
+TEST(IncludeCleaner, StdlibUnused) {
+  setIncludeCleanerAnalyzesStdlib(true);
+  auto Cleanup =
+      llvm::make_scope_exit([] { setIncludeCleanerAnalyzesStdlib(false); });
+
+  auto TU = TestTU::withCode(R"cpp(
+    #include <list>
+    #include <queue>
+    std::list<int> x;
+  )cpp");
+  // Layout of std library impl is not relevant.
+  TU.AdditionalFiles["bits"] = R"cpp(
+    #pragma once
+    namespace std {
+      template <typename> class list {};
+      template <typename> class queue {};
+    }
+  )cpp";
+  TU.AdditionalFiles["list"] = "#include <bits>";
+  TU.AdditionalFiles["queue"] = "#include <bits>";
+  TU.ExtraArgs = {"-isystem", testRoot()};
+  auto AST = TU.build();
+
+  auto Unused = computeUnusedIncludes(AST);
+  EXPECT_THAT(Unused, ElementsAre(Pointee(WrittenInclusion("<queue>"))));
+}
+
 TEST(IncludeCleaner, GetUnusedHeaders) {
   llvm::StringLiteral MainFile = R"cpp(
     #include "a.h"
@@ -301,7 +381,7 @@ TEST(IncludeCleaner, VirtualBuffers) {
   auto ReferencedFiles =
       findReferencedFiles(findReferencedLocations(AST), Includes, SM);
   llvm::StringSet<> ReferencedFileNames;
-  for (FileID FID : ReferencedFiles)
+  for (FileID FID : ReferencedFiles.User)
     ReferencedFileNames.insert(
         SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
   // Note we deduped the names as _number_ of <built-in>s is uninteresting.
@@ -352,7 +432,7 @@ TEST(IncludeCleaner, DistinctUnguardedInclusions) {
                           AST.getIncludeStructure(), AST.getSourceManager());
   llvm::StringSet<> ReferencedFileNames;
   auto &SM = AST.getSourceManager();
-  for (FileID FID : ReferencedFiles)
+  for (FileID FID : ReferencedFiles.User)
     ReferencedFileNames.insert(
         SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
   // Note that we have uplifted the referenced files from non self-contained
@@ -386,7 +466,7 @@ TEST(IncludeCleaner, NonSelfContainedHeaders) {
                           AST.getIncludeStructure(), AST.getSourceManager());
   llvm::StringSet<> ReferencedFileNames;
   auto &SM = AST.getSourceManager();
-  for (FileID FID : ReferencedFiles)
+  for (FileID FID : ReferencedFiles.User)
     ReferencedFileNames.insert(
         SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
   // Note that we have uplifted the referenced files from non self-contained
@@ -406,7 +486,7 @@ TEST(IncludeCleaner, IWYUPragmas) {
   auto ReferencedFiles =
       findReferencedFiles(findReferencedLocations(AST),
                           AST.getIncludeStructure(), AST.getSourceManager());
-  EXPECT_TRUE(ReferencedFiles.empty());
+  EXPECT_TRUE(ReferencedFiles.User.empty());
   EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty()));
 }
 


        


More information about the cfe-commits mailing list