[clang-tools-extra] b8c5837 - [clangd] Group filename calculations in SymbolCollector, and cache mroe.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 11 03:59:36 PST 2021


Author: Sam McCall
Date: 2021-03-11T12:59:26+01:00
New Revision: b8c58374f66b2a03ff3f48c419037d463c5b80ed

URL: https://github.com/llvm/llvm-project/commit/b8c58374f66b2a03ff3f48c419037d463c5b80ed
DIFF: https://github.com/llvm/llvm-project/commit/b8c58374f66b2a03ff3f48c419037d463c5b80ed.diff

LOG: [clangd] Group filename calculations in SymbolCollector, and cache mroe.

Also give CanonicalIncludes a less powerful interface (canonicalizes
symbols vs headers separately) so we can cache its results better.

Prior to this:
 - path->uri conversions were not consistently cached, this is
   particularly cheap when we start from a FileEntry* (which we often can)
 - only a small fraction of header-to-include calculation was cached

This is a significant speedup at least for dynamic indexing of preambles.
On my machine, opening XRefs.cpp:

```
PreambleCallback 1.208 -> 1.019 (-15.7%)
BuildPreamble    5.538 -> 5.214 (-5.8%)
```

Differential Revision: https://reviews.llvm.org/D98371

Added: 
    

Modified: 
    clang-tools-extra/clangd/index/CanonicalIncludes.cpp
    clang-tools-extra/clangd/index/CanonicalIncludes.h
    clang-tools-extra/clangd/index/FileIndex.cpp
    clang-tools-extra/clangd/index/SymbolCollector.cpp
    clang-tools-extra/clangd/index/SymbolCollector.h
    clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
    clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
index 4741ef622512..f7269b686552 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -28,22 +28,14 @@ void CanonicalIncludes::addMapping(llvm::StringRef Path,
 /// Used to minimize the number of lookups in suffix path mappings.
 constexpr int MaxSuffixComponents = 3;
 
-llvm::StringRef
-CanonicalIncludes::mapHeader(llvm::StringRef Header,
-                             llvm::StringRef QualifiedName) const {
+llvm::StringRef CanonicalIncludes::mapHeader(llvm::StringRef Header) const {
   assert(!Header.empty());
-  if (StdSymbolMapping) {
-    auto SE = StdSymbolMapping->find(QualifiedName);
-    if (SE != StdSymbolMapping->end())
-      return SE->second;
-  }
-
   auto MapIt = FullPathMapping.find(Header);
   if (MapIt != FullPathMapping.end())
     return MapIt->second;
 
   if (!StdSuffixHeaderMapping)
-    return Header;
+    return "";
 
   int Components = 1;
 
@@ -56,7 +48,11 @@ CanonicalIncludes::mapHeader(llvm::StringRef Header,
     if (MappingIt != StdSuffixHeaderMapping->end())
       return MappingIt->second;
   }
-  return Header;
+  return "";
+}
+
+llvm::StringRef CanonicalIncludes::mapSymbol(llvm::StringRef QName) const {
+  return StdSymbolMapping ? StdSymbolMapping->lookup(QName) : "";
 }
 
 std::unique_ptr<CommentHandler>

diff  --git a/clang-tools-extra/clangd/index/CanonicalIncludes.h b/clang-tools-extra/clangd/index/CanonicalIncludes.h
index da7dc46db778..36e33e9324dc 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.h
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.h
@@ -38,11 +38,11 @@ class CanonicalIncludes {
   /// Adds a string-to-string mapping from \p Path to \p CanonicalPath.
   void addMapping(llvm::StringRef Path, llvm::StringRef CanonicalPath);
 
-  /// Returns the canonical include for symbol with \p QualifiedName.
-  /// \p Header is the file the declaration was reachable from.
-  /// Header itself will be returned if there is no relevant mapping.
-  llvm::StringRef mapHeader(llvm::StringRef Header,
-                            llvm::StringRef QualifiedName) const;
+  /// Returns the overridden include for symbol with \p QualifiedName, or "".
+  llvm::StringRef mapSymbol(llvm::StringRef QualifiedName) const;
+
+  /// Returns the overridden include for for files in \p Header, or "".
+  llvm::StringRef mapHeader(llvm::StringRef Header) const;
 
   /// Adds mapping for system headers and some special symbols (e.g. STL symbols
   /// in <iosfwd> need to be mapped individually). Approximately, the following

diff  --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index b91c66b88770..eb9648c2a6ae 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -77,9 +77,9 @@ SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
 
   SymbolCollector Collector(std::move(CollectorOpts));
   Collector.setPreprocessor(PP);
+  index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
   if (MacroRefsToIndex)
     Collector.handleMacros(*MacroRefsToIndex);
-  index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
 
   const auto &SM = AST.getSourceManager();
   const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());

diff  --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index e330c9fe402d..84d0ca9bbbd1 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -48,31 +48,6 @@ const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
   return ND;
 }
 
-// Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
-// current working directory of the given SourceManager if the Path is not an
-// absolute path. If failed, this resolves relative paths against \p FallbackDir
-// to get an absolute path. Then, this tries creating an URI for the absolute
-// path with schemes specified in \p Opts. This returns an URI with the first
-// working scheme, if there is any; otherwise, this returns None.
-//
-// The Path can be a path relative to the build directory, or retrieved from
-// the SourceManager.
-std::string toURI(const SourceManager &SM, llvm::StringRef Path,
-                  const SymbolCollector::Options &Opts) {
-  llvm::SmallString<128> AbsolutePath(Path);
-  if (auto File = SM.getFileManager().getFile(Path)) {
-    if (auto CanonPath = getCanonicalPath(*File, SM)) {
-      AbsolutePath = *CanonPath;
-    }
-  }
-  // We don't perform is_absolute check in an else branch because makeAbsolute
-  // might return a relative path on some InMemoryFileSystems.
-  if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
-    llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
-  llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
-  return URI::create(AbsolutePath).toString();
-}
-
 // Checks whether the decl is a private symbol in a header generated by
 // protobuf compiler.
 // FIXME: make filtering extensible when there are more use cases for symbol
@@ -139,25 +114,6 @@ getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
           CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
 }
 
-// Return the symbol location of the token at \p TokLoc.
-llvm::Optional<SymbolLocation>
-getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
-                 const SymbolCollector::Options &Opts,
-                 const clang::LangOptions &LangOpts,
-                 std::string &FileURIStorage) {
-  auto Path = SM.getFilename(TokLoc);
-  if (Path.empty())
-    return None;
-  FileURIStorage = toURI(SM, Path, Opts);
-  SymbolLocation Result;
-  Result.FileURI = FileURIStorage.c_str();
-  auto Range = getTokenRange(TokLoc, SM, LangOpts);
-  Result.Start = Range.first;
-  Result.End = Range.second;
-
-  return Result;
-}
-
 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
 // its symbol (e.g. a go-to-declaration target). This overrides the default of
 // using Clang's canonical declaration, which is the first in the TU.
@@ -198,10 +154,178 @@ llvm::Optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
 
 } // namespace
 
+// Encapsulates decisions about how to record header paths in the index,
+// including filename normalization, URI conversion etc.
+// Expensive checks are cached internally.
+class SymbolCollector::HeaderFileURICache {
+  // Weird double-indirect access to PP, which might not be ready yet when
+  // HeaderFiles is created but will be by the time it's used.
+  // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
+  const std::shared_ptr<Preprocessor> &PP;
+  const SourceManager &SM;
+  const CanonicalIncludes *Includes;
+  llvm::StringRef FallbackDir;
+  llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
+  llvm::StringMap<std::string> CachePathToURI;
+  llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
+
+public:
+  HeaderFileURICache(const std::shared_ptr<Preprocessor> &PP,
+                     const SourceManager &SM,
+                     const SymbolCollector::Options &Opts)
+      : PP(PP), SM(SM), Includes(Opts.Includes), FallbackDir(Opts.FallbackDir) {
+  }
+
+  // Returns a canonical URI for the file \p FE.
+  // We attempt to make the path absolute first.
+  const std::string &toURI(const FileEntry *FE) {
+    auto R = CacheFEToURI.try_emplace(FE);
+    if (R.second) {
+      auto CanonPath = getCanonicalPath(FE, SM);
+      R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE->getName());
+    }
+    return *R.first->second;
+  }
+
+  // Returns a canonical URI for \p Path.
+  // If the file is in the FileManager, use that to canonicalize the path.
+  // We attempt to make the path absolute in any case.
+  const std::string &toURI(llvm::StringRef Path) {
+    if (auto File = SM.getFileManager().getFile(Path))
+      return toURI(*File);
+    return toURIInternal(Path);
+  }
+
+  // Gets a canonical include (URI of the header or <header> or "header") for
+  // header of \p FID (which should usually be the *expansion* file).
+  // This does not account for any per-symbol overrides!
+  // Returns "" if includes should not be inserted for this file.
+  llvm::StringRef getIncludeHeader(FileID FID) {
+    auto R = CacheFIDToInclude.try_emplace(FID);
+    if (R.second)
+      R.first->second = getIncludeHeaderUncached(FID);
+    return R.first->second;
+  }
+
+private:
+  // This takes care of making paths absolute and path->URI caching, but no
+  // FileManager-based canonicalization.
+  const std::string &toURIInternal(llvm::StringRef Path) {
+    auto R = CachePathToURI.try_emplace(Path);
+    if (R.second) {
+      llvm::SmallString<256> AbsPath = Path;
+      if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
+        llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
+      assert(llvm::sys::path::is_absolute(AbsPath) &&
+             "If the VFS can't make paths absolute, a FallbackDir must be "
+             "provided");
+      llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
+      R.first->second = URI::create(AbsPath).toString();
+    }
+    return R.first->second;
+  }
+
+  llvm::StringRef getIncludeHeaderUncached(FileID FID) {
+    const FileEntry *FE = SM.getFileEntryForID(FID);
+    if (!FE || FE->getName().empty())
+      return "";
+    llvm::StringRef Filename = FE->getName();
+    // If a file is mapped by canonical headers, use that mapping, regardless
+    // of whether it's an otherwise-good header (header guards etc).
+    if (Includes) {
+      llvm::StringRef Canonical = Includes->mapHeader(Filename);
+      if (!Canonical.empty()) {
+        // If we had a mapping, always use it.
+        if (Canonical.startswith("<") || Canonical.startswith("\""))
+          return Canonical;
+        return toURI(Canonical);
+      }
+    }
+    if (!isSelfContainedHeader(FID, FE)) {
+      // A .inc or .def file is often included into a real header to define
+      // symbols (e.g. LLVM tablegen files).
+      if (Filename.endswith(".inc") || Filename.endswith(".def"))
+        // Don't use cache reentrantly due to iterator invalidation.
+        return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
+      // Conservatively refuse to insert #includes to files without guards.
+      return "";
+    }
+    // Standard case: just insert the file itself.
+    return toURI(FE);
+  }
+
+  bool isSelfContainedHeader(FileID FID, const FileEntry *FE) {
+    // FIXME: Should files that have been #import'd be considered
+    // self-contained? That's really a property of the includer,
+    // not of the file.
+    if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
+        !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
+      return false;
+    // This pattern indicates that a header can't be used without
+    // particular preprocessor state, usually set up by another header.
+    if (isDontIncludeMeHeader(SM.getBufferData(FID)))
+      return false;
+    return true;
+  }
+
+  // Is Line an #if or #ifdef directive?
+  static bool isIf(llvm::StringRef Line) {
+    Line = Line.ltrim();
+    if (!Line.consume_front("#"))
+      return false;
+    Line = Line.ltrim();
+    return Line.startswith("if");
+  }
+
+  // Is Line an #error directive mentioning includes?
+  static bool isErrorAboutInclude(llvm::StringRef Line) {
+    Line = Line.ltrim();
+    if (!Line.consume_front("#"))
+      return false;
+    Line = Line.ltrim();
+    if (!Line.startswith("error"))
+      return false;
+    return Line.contains_lower("includ"); // Matches "include" or "including".
+  }
+
+  // Heuristically headers that only want to be included via an umbrella.
+  static bool isDontIncludeMeHeader(llvm::StringRef Content) {
+    llvm::StringRef Line;
+    // Only sniff up to 100 lines or 10KB.
+    Content = Content.take_front(100 * 100);
+    for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
+      std::tie(Line, Content) = Content.split('\n');
+      if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
+        return true;
+    }
+    return false;
+  }
+};
+
+// Return the symbol location of the token at \p TokLoc.
+llvm::Optional<SymbolLocation>
+SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
+  const auto &SM = ASTCtx->getSourceManager();
+  auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc));
+  if (!FE)
+    return None;
+
+  SymbolLocation Result;
+  Result.FileURI = HeaderFileURIs->toURI(FE).c_str();
+  auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
+  Result.Start = Range.first;
+  Result.End = Range.second;
+
+  return Result;
+}
+
 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
+SymbolCollector::~SymbolCollector() = default;
 
 void SymbolCollector::initialize(ASTContext &Ctx) {
   ASTCtx = &Ctx;
+  HeaderFileURIs = std::make_unique<HeaderFileURICache>(
+      PP, ASTCtx->getSourceManager(), Opts);
   CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
   CompletionTUInfo =
       std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
@@ -262,7 +386,7 @@ bool SymbolCollector::handleDeclOccurrence(
     const Decl *D, index::SymbolRoleSet Roles,
     llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
     index::IndexDataConsumer::ASTNodeInfo ASTNode) {
-  assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
+  assert(ASTCtx && PP.get() && HeaderFileURIs);
   assert(CompletionAllocator && CompletionTUInfo);
   assert(ASTNode.OrigD);
   // Indexing API puts canonical decl into D, which might not have a valid
@@ -383,12 +507,12 @@ bool SymbolCollector::handleDeclOccurrence(
 }
 
 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
-  assert(PP.get());
+  assert(HeaderFileURIs && PP.get());
   const auto &SM = PP->getSourceManager();
   const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
   assert(MainFileEntry);
 
-  const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
+  const std::string &MainFileURI = HeaderFileURIs->toURI(MainFileEntry);
   // Add macro references.
   for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
     for (const auto &MacroRef : IDToRefs.second) {
@@ -486,11 +610,9 @@ bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
   }
   S.SymInfo = index::getSymbolInfoForMacro(*MI);
   S.Origin = Opts.Origin;
-  std::string FileURI;
   // FIXME: use the result to filter out symbols.
   shouldIndexFile(SM.getFileID(Loc));
-  if (auto DeclLoc =
-          getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
+  if (auto DeclLoc = getTokenLocation(DefLoc))
     S.CanonicalDeclaration = *DeclLoc;
 
   CodeCompletionResult SymbolCompletion(Name);
@@ -578,44 +700,48 @@ void SymbolCollector::finish() {
   }
   // Fill in IncludeHeaders.
   // We delay this until end of TU so header guards are all resolved.
-  // Symbols in slabs aren't mutable, so insert() has to walk all the strings
-  // :-(
-  for (const auto &Entry : IncludeFiles)
+  llvm::SmallString<128> QName;
+  for (const auto &Entry : IncludeFiles) {
     if (const Symbol *S = Symbols.find(Entry.first)) {
-      if (auto Header = getIncludeHeader(*S, Entry.second)) {
+      llvm::StringRef IncludeHeader;
+      // Look for an overridden include header for this symbol specifically.
+      if (Opts.Includes) {
+        QName = S->Scope;
+        QName.append(S->Name);
+        IncludeHeader = Opts.Includes->mapSymbol(QName);
+        if (!IncludeHeader.empty()) {
+          if (IncludeHeader.front() != '"' && IncludeHeader.front() != '<')
+            IncludeHeader = HeaderFileURIs->toURI(IncludeHeader);
+          else if (IncludeHeader == "<utility>" && QName == "std::move" &&
+                   S->Signature.contains(','))
+            IncludeHeader = "<algorithm>";
+        }
+      }
+      // Otherwise find the approprate include header for the defining file.
+      if (IncludeHeader.empty())
+        IncludeHeader = HeaderFileURIs->getIncludeHeader(Entry.second);
+
+      // Symbols in slabs aren't mutable, insert() has to walk all the strings
+      if (!IncludeHeader.empty()) {
         Symbol NewSym = *S;
-        NewSym.IncludeHeaders.push_back({std::move(*Header), 1});
+        NewSym.IncludeHeaders.push_back({IncludeHeader, 1});
         Symbols.insert(NewSym);
       }
     }
+  }
 
   const auto &SM = ASTCtx->getSourceManager();
-  llvm::DenseMap<FileID, std::string> URICache;
-  auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
-    auto Found = URICache.find(FID);
-    if (Found == URICache.end()) {
-      if (auto *FileEntry = SM.getFileEntryForID(FID)) {
-        auto FileURI = toURI(SM, FileEntry->getName(), Opts);
-        Found = URICache.insert({FID, FileURI}).first;
-      } else {
-        // Ignore cases where we can not find a corresponding file entry for
-        // given location, e.g. symbols formed via macro concatenation.
-        return None;
-      }
-    }
-    return Found->second;
-  };
   auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
                         bool Spelled = false) {
     auto FileID = SM.getFileID(LocAndRole.Loc);
     // FIXME: use the result to filter out references.
     shouldIndexFile(FileID);
-    if (auto FileURI = GetURI(FileID)) {
+    if (const auto *FE = SM.getFileEntryForID(FileID)) {
       auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
       Ref R;
       R.Location.Start = Range.first;
       R.Location.End = Range.second;
-      R.Location.FileURI = FileURI->c_str();
+      R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
       R.Kind = toRefKind(LocAndRole.Roles, Spelled);
       R.Container = getSymbolID(LocAndRole.Container);
       Refs.insert(ID, R);
@@ -656,8 +782,6 @@ void SymbolCollector::finish() {
   ReferencedDecls.clear();
   ReferencedMacros.clear();
   DeclRefs.clear();
-  FilesToIndexCache.clear();
-  HeaderIsSelfContainedCache.clear();
   IncludeFiles.clear();
 }
 
@@ -683,13 +807,11 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
   if (!IsMainFileOnly)
     S.Flags |= Symbol::VisibleOutsideFile;
   S.SymInfo = index::getSymbolInfo(&ND);
-  std::string FileURI;
   auto Loc = nameLocation(ND, SM);
   assert(Loc.isValid() && "Invalid source location for NamedDecl");
   // FIXME: use the result to filter out symbols.
   shouldIndexFile(SM.getFileID(Loc));
-  if (auto DeclLoc =
-          getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
+  if (auto DeclLoc = getTokenLocation(Loc))
     S.CanonicalDeclaration = *DeclLoc;
 
   S.Origin = Opts.Origin;
@@ -743,114 +865,15 @@ void SymbolCollector::addDefinition(const NamedDecl &ND,
   // This is not ideal, but avoids duplicating the "is this a definition" check
   // in clang::index. We should only see one definition.
   Symbol S = DeclSym;
-  std::string FileURI;
   const auto &SM = ND.getASTContext().getSourceManager();
   auto Loc = nameLocation(ND, SM);
   // FIXME: use the result to filter out symbols.
   shouldIndexFile(SM.getFileID(Loc));
-  if (auto DefLoc =
-          getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
+  if (auto DefLoc = getTokenLocation(Loc))
     S.Definition = *DefLoc;
   Symbols.insert(S);
 }
 
-/// Gets a canonical include (URI of the header or <header> or "header") for
-/// header of \p FID (which should usually be the *expansion* file).
-/// Returns None if includes should not be inserted for this file.
-llvm::Optional<std::string> SymbolCollector::getIncludeHeader(const Symbol &S,
-                                                              FileID FID) {
-  const SourceManager &SM = ASTCtx->getSourceManager();
-  const FileEntry *FE = SM.getFileEntryForID(FID);
-  if (!FE || FE->getName().empty())
-    return llvm::None;
-  llvm::StringRef Filename = FE->getName();
-  // If a file is mapped by canonical headers, use that mapping, regardless
-  // of whether it's an otherwise-good header (header guards etc).
-  if (Opts.Includes) {
-    llvm::SmallString<256> QName = S.Scope;
-    QName.append(S.Name);
-    llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
-    // If we had a mapping, always use it.
-    if (Canonical.startswith("<") || Canonical.startswith("\"")) {
-      // Hack: there are two std::move() overloads from 
diff erent headers.
-      // CanonicalIncludes returns the common one-arg one from <utility>.
-      if (Canonical == "<utility>" && S.Name == "move" &&
-          S.Signature.contains(','))
-        Canonical = "<algorithm>";
-      return Canonical.str();
-    }
-    if (Canonical != Filename)
-      return toURI(SM, Canonical, Opts);
-  }
-  if (!isSelfContainedHeader(FID)) {
-    // A .inc or .def file is often included into a real header to define
-    // symbols (e.g. LLVM tablegen files).
-    if (Filename.endswith(".inc") || Filename.endswith(".def"))
-      return getIncludeHeader(S, SM.getFileID(SM.getIncludeLoc(FID)));
-    // Conservatively refuse to insert #includes to files without guards.
-    return llvm::None;
-  }
-  // Standard case: just insert the file itself.
-  return toURI(SM, Filename, Opts);
-}
-
-bool SymbolCollector::isSelfContainedHeader(FileID FID) {
-  // The real computation (which will be memoized).
-  auto Compute = [&] {
-    const SourceManager &SM = ASTCtx->getSourceManager();
-    const FileEntry *FE = SM.getFileEntryForID(FID);
-    if (!FE)
-      return false;
-    // FIXME: Should files that have been #import'd be considered
-    // self-contained? That's really a property of the includer,
-    // not of the file.
-    if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
-        !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
-      return false;
-    // This pattern indicates that a header can't be used without
-    // particular preprocessor state, usually set up by another header.
-    if (isDontIncludeMeHeader(SM.getBufferData(FID)))
-      return false;
-    return true;
-  };
-
-  auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
-  if (R.second)
-    R.first->second = Compute();
-  return R.first->second;
-}
-
-// Is Line an #if or #ifdef directive?
-static bool isIf(llvm::StringRef Line) {
-  Line = Line.ltrim();
-  if (!Line.consume_front("#"))
-    return false;
-  Line = Line.ltrim();
-  return Line.startswith("if");
-}
-// Is Line an #error directive mentioning includes?
-static bool isErrorAboutInclude(llvm::StringRef Line) {
-  Line = Line.ltrim();
-  if (!Line.consume_front("#"))
-    return false;
-  Line = Line.ltrim();
-  if (!Line.startswith("error"))
-    return false;
-  return Line.contains_lower("includ"); // Matches "include" or "including".
-}
-
-bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
-  llvm::StringRef Line;
-  // Only sniff up to 100 lines or 10KB.
-  Content = Content.take_front(100 * 100);
-  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
-    std::tie(Line, Content) = Content.split('\n');
-    if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
-      return true;
-  }
-  return false;
-}
-
 bool SymbolCollector::shouldIndexFile(FileID FID) {
   if (!Opts.FileFilter)
     return true;

diff  --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h
index 92f847f3d8f3..af184fde69c6 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.h
+++ b/clang-tools-extra/clangd/index/SymbolCollector.h
@@ -91,6 +91,7 @@ class SymbolCollector : public index::IndexDataConsumer {
   };
 
   SymbolCollector(Options Opts);
+  ~SymbolCollector();
 
   /// Returns true is \p ND should be collected.
   static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx,
@@ -132,10 +133,9 @@ class SymbolCollector : public index::IndexDataConsumer {
   void processRelations(const NamedDecl &ND, const SymbolID &ID,
                         ArrayRef<index::SymbolRelation> Relations);
 
+  llvm::Optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc);
+
   llvm::Optional<std::string> getIncludeHeader(const Symbol &S, FileID);
-  bool isSelfContainedHeader(FileID);
-  // Heuristically headers that only want to be included via an umbrella.
-  static bool isDontIncludeMeHeader(llvm::StringRef);
 
   // All Symbols collected from the AST.
   SymbolSlab::Builder Symbols;
@@ -175,7 +175,10 @@ class SymbolCollector : public index::IndexDataConsumer {
   llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls;
   // Cache whether to index a file or not.
   llvm::DenseMap<FileID, bool> FilesToIndexCache;
-  llvm::DenseMap<FileID, bool> HeaderIsSelfContainedCache;
+  // Encapsulates calculations and caches around header paths, which headers
+  // to insert for which symbol, etc.
+  class HeaderFileURICache;
+  std::unique_ptr<HeaderFileURICache> HeaderFileURIs;
 };
 
 } // namespace clangd

diff  --git a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
index fa96a4579624..31f99b7615d3 100644
--- a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
@@ -20,11 +20,8 @@ TEST(CanonicalIncludesTest, CStandardLibrary) {
   Language.C11 = true;
   CI.addSystemHeadersMapping(Language);
   // Usual standard library symbols are mapped correctly.
-  EXPECT_EQ("<stdio.h>", CI.mapHeader("path/stdio.h", "printf"));
-  // Suffix mapping isn't available for C, instead of mapping to `<cstdio> we
-  // just leave the header as-is.
-  EXPECT_EQ("include/stdio.h",
-            CI.mapHeader("include/stdio.h", "unknown_symbol"));
+  EXPECT_EQ("<stdio.h>", CI.mapSymbol("printf"));
+  EXPECT_EQ("", CI.mapSymbol("unknown_symbol"));
 }
 
 TEST(CanonicalIncludesTest, CXXStandardLibrary) {
@@ -34,17 +31,16 @@ TEST(CanonicalIncludesTest, CXXStandardLibrary) {
   CI.addSystemHeadersMapping(Language);
 
   // Usual standard library symbols are mapped correctly.
-  EXPECT_EQ("<vector>", CI.mapHeader("path/vector.h", "std::vector"));
-  EXPECT_EQ("<cstdio>", CI.mapHeader("path/stdio.h", "std::printf"));
+  EXPECT_EQ("<vector>", CI.mapSymbol("std::vector"));
+  EXPECT_EQ("<cstdio>", CI.mapSymbol("std::printf"));
   // std::move is ambiguous, currently always mapped to <utility>
-  EXPECT_EQ("<utility>",
-            CI.mapHeader("libstdc++/bits/stl_algo.h", "std::move"));
+  EXPECT_EQ("<utility>", CI.mapSymbol("std::move"));
   // Unknown std symbols aren't mapped.
-  EXPECT_EQ("foo/bar.h", CI.mapHeader("foo/bar.h", "std::notathing"));
+  EXPECT_EQ("", CI.mapSymbol("std::notathing"));
   // iosfwd declares some symbols it doesn't own.
-  EXPECT_EQ("<ostream>", CI.mapHeader("iosfwd", "std::ostream"));
+  EXPECT_EQ("<ostream>", CI.mapSymbol("std::ostream"));
   // And (for now) we assume it owns the others.
-  EXPECT_EQ("<iosfwd>", CI.mapHeader("iosfwd", "std::notwathing"));
+  EXPECT_EQ("<iosfwd>", CI.mapHeader("iosfwd"));
 }
 
 TEST(CanonicalIncludesTest, PathMapping) {
@@ -52,20 +48,8 @@ TEST(CanonicalIncludesTest, PathMapping) {
   CanonicalIncludes CI;
   CI.addMapping("foo/bar", "<baz>");
 
-  EXPECT_EQ("<baz>", CI.mapHeader("foo/bar", "some::symbol"));
-  EXPECT_EQ("bar/bar", CI.mapHeader("bar/bar", "some::symbol"));
-}
-
-TEST(CanonicalIncludesTest, SymbolMapping) {
-  // As used for standard library.
-  CanonicalIncludes CI;
-  LangOptions Language;
-  Language.CPlusPlus = true;
-  // Ensures 'std::vector' is mapped to '<vector>'.
-  CI.addSystemHeadersMapping(Language);
-
-  EXPECT_EQ("<vector>", CI.mapHeader("foo/bar", "std::vector"));
-  EXPECT_EQ("foo/bar", CI.mapHeader("foo/bar", "other::symbol"));
+  EXPECT_EQ("<baz>", CI.mapHeader("foo/bar"));
+  EXPECT_EQ("", CI.mapHeader("bar/bar"));
 }
 
 TEST(CanonicalIncludesTest, Precedence) {
@@ -76,15 +60,9 @@ TEST(CanonicalIncludesTest, Precedence) {
   CI.addSystemHeadersMapping(Language);
 
   // We added a mapping from some/path to <path>.
-  ASSERT_EQ("<path>", CI.mapHeader("some/path", ""));
+  ASSERT_EQ("<path>", CI.mapHeader("some/path"));
   // We should have a path from 'bits/stl_vector.h' to '<vector>'.
-  ASSERT_EQ("<vector>", CI.mapHeader("bits/stl_vector.h", ""));
-  // We should also have a symbol mapping from 'std::map' to '<map>'.
-  ASSERT_EQ("<map>", CI.mapHeader("some/header.h", "std::map"));
-
-  // And the symbol mapping should take precedence over paths mapping.
-  EXPECT_EQ("<map>", CI.mapHeader("bits/stl_vector.h", "std::map"));
-  EXPECT_EQ("<map>", CI.mapHeader("some/path", "std::map"));
+  ASSERT_EQ("<vector>", CI.mapHeader("bits/stl_vector.h"));
 }
 
 } // namespace

diff  --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
index 924cfd03cba7..87df23baf48a 100644
--- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -1463,9 +1463,6 @@ TEST_F(SymbolCollectorTest, CanonicalSTLHeader) {
       }
       )cpp",
       /*Main=*/"");
-  for (const auto &S : Symbols)
-    llvm::errs() << S.Scope << S.Name << " in " << S.IncludeHeaders.size()
-                 << "\n";
   EXPECT_THAT(
       Symbols,
       UnorderedElementsAre(


        


More information about the cfe-commits mailing list