[clang-tools-extra] b8c5837 - [clangd] Group filename calculations in SymbolCollector, and cache mroe.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 11 03:59:36 PST 2021
Author: Sam McCall
Date: 2021-03-11T12:59:26+01:00
New Revision: b8c58374f66b2a03ff3f48c419037d463c5b80ed
URL: https://github.com/llvm/llvm-project/commit/b8c58374f66b2a03ff3f48c419037d463c5b80ed
DIFF: https://github.com/llvm/llvm-project/commit/b8c58374f66b2a03ff3f48c419037d463c5b80ed.diff
LOG: [clangd] Group filename calculations in SymbolCollector, and cache mroe.
Also give CanonicalIncludes a less powerful interface (canonicalizes
symbols vs headers separately) so we can cache its results better.
Prior to this:
- path->uri conversions were not consistently cached, this is
particularly cheap when we start from a FileEntry* (which we often can)
- only a small fraction of header-to-include calculation was cached
This is a significant speedup at least for dynamic indexing of preambles.
On my machine, opening XRefs.cpp:
```
PreambleCallback 1.208 -> 1.019 (-15.7%)
BuildPreamble 5.538 -> 5.214 (-5.8%)
```
Differential Revision: https://reviews.llvm.org/D98371
Added:
Modified:
clang-tools-extra/clangd/index/CanonicalIncludes.cpp
clang-tools-extra/clangd/index/CanonicalIncludes.h
clang-tools-extra/clangd/index/FileIndex.cpp
clang-tools-extra/clangd/index/SymbolCollector.cpp
clang-tools-extra/clangd/index/SymbolCollector.h
clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
index 4741ef622512..f7269b686552 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -28,22 +28,14 @@ void CanonicalIncludes::addMapping(llvm::StringRef Path,
/// Used to minimize the number of lookups in suffix path mappings.
constexpr int MaxSuffixComponents = 3;
-llvm::StringRef
-CanonicalIncludes::mapHeader(llvm::StringRef Header,
- llvm::StringRef QualifiedName) const {
+llvm::StringRef CanonicalIncludes::mapHeader(llvm::StringRef Header) const {
assert(!Header.empty());
- if (StdSymbolMapping) {
- auto SE = StdSymbolMapping->find(QualifiedName);
- if (SE != StdSymbolMapping->end())
- return SE->second;
- }
-
auto MapIt = FullPathMapping.find(Header);
if (MapIt != FullPathMapping.end())
return MapIt->second;
if (!StdSuffixHeaderMapping)
- return Header;
+ return "";
int Components = 1;
@@ -56,7 +48,11 @@ CanonicalIncludes::mapHeader(llvm::StringRef Header,
if (MappingIt != StdSuffixHeaderMapping->end())
return MappingIt->second;
}
- return Header;
+ return "";
+}
+
+llvm::StringRef CanonicalIncludes::mapSymbol(llvm::StringRef QName) const {
+ return StdSymbolMapping ? StdSymbolMapping->lookup(QName) : "";
}
std::unique_ptr<CommentHandler>
diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.h b/clang-tools-extra/clangd/index/CanonicalIncludes.h
index da7dc46db778..36e33e9324dc 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.h
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.h
@@ -38,11 +38,11 @@ class CanonicalIncludes {
/// Adds a string-to-string mapping from \p Path to \p CanonicalPath.
void addMapping(llvm::StringRef Path, llvm::StringRef CanonicalPath);
- /// Returns the canonical include for symbol with \p QualifiedName.
- /// \p Header is the file the declaration was reachable from.
- /// Header itself will be returned if there is no relevant mapping.
- llvm::StringRef mapHeader(llvm::StringRef Header,
- llvm::StringRef QualifiedName) const;
+ /// Returns the overridden include for symbol with \p QualifiedName, or "".
+ llvm::StringRef mapSymbol(llvm::StringRef QualifiedName) const;
+
+ /// Returns the overridden include for for files in \p Header, or "".
+ llvm::StringRef mapHeader(llvm::StringRef Header) const;
/// Adds mapping for system headers and some special symbols (e.g. STL symbols
/// in <iosfwd> need to be mapped individually). Approximately, the following
diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index b91c66b88770..eb9648c2a6ae 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -77,9 +77,9 @@ SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
SymbolCollector Collector(std::move(CollectorOpts));
Collector.setPreprocessor(PP);
+ index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
if (MacroRefsToIndex)
Collector.handleMacros(*MacroRefsToIndex);
- index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
const auto &SM = AST.getSourceManager();
const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index e330c9fe402d..84d0ca9bbbd1 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -48,31 +48,6 @@ const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
return ND;
}
-// Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
-// current working directory of the given SourceManager if the Path is not an
-// absolute path. If failed, this resolves relative paths against \p FallbackDir
-// to get an absolute path. Then, this tries creating an URI for the absolute
-// path with schemes specified in \p Opts. This returns an URI with the first
-// working scheme, if there is any; otherwise, this returns None.
-//
-// The Path can be a path relative to the build directory, or retrieved from
-// the SourceManager.
-std::string toURI(const SourceManager &SM, llvm::StringRef Path,
- const SymbolCollector::Options &Opts) {
- llvm::SmallString<128> AbsolutePath(Path);
- if (auto File = SM.getFileManager().getFile(Path)) {
- if (auto CanonPath = getCanonicalPath(*File, SM)) {
- AbsolutePath = *CanonPath;
- }
- }
- // We don't perform is_absolute check in an else branch because makeAbsolute
- // might return a relative path on some InMemoryFileSystems.
- if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
- llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
- llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
- return URI::create(AbsolutePath).toString();
-}
-
// Checks whether the decl is a private symbol in a header generated by
// protobuf compiler.
// FIXME: make filtering extensible when there are more use cases for symbol
@@ -139,25 +114,6 @@ getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
}
-// Return the symbol location of the token at \p TokLoc.
-llvm::Optional<SymbolLocation>
-getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
- const SymbolCollector::Options &Opts,
- const clang::LangOptions &LangOpts,
- std::string &FileURIStorage) {
- auto Path = SM.getFilename(TokLoc);
- if (Path.empty())
- return None;
- FileURIStorage = toURI(SM, Path, Opts);
- SymbolLocation Result;
- Result.FileURI = FileURIStorage.c_str();
- auto Range = getTokenRange(TokLoc, SM, LangOpts);
- Result.Start = Range.first;
- Result.End = Range.second;
-
- return Result;
-}
-
// Checks whether \p ND is a good candidate to be the *canonical* declaration of
// its symbol (e.g. a go-to-declaration target). This overrides the default of
// using Clang's canonical declaration, which is the first in the TU.
@@ -198,10 +154,178 @@ llvm::Optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
} // namespace
+// Encapsulates decisions about how to record header paths in the index,
+// including filename normalization, URI conversion etc.
+// Expensive checks are cached internally.
+class SymbolCollector::HeaderFileURICache {
+ // Weird double-indirect access to PP, which might not be ready yet when
+ // HeaderFiles is created but will be by the time it's used.
+ // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
+ const std::shared_ptr<Preprocessor> &PP;
+ const SourceManager &SM;
+ const CanonicalIncludes *Includes;
+ llvm::StringRef FallbackDir;
+ llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
+ llvm::StringMap<std::string> CachePathToURI;
+ llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
+
+public:
+ HeaderFileURICache(const std::shared_ptr<Preprocessor> &PP,
+ const SourceManager &SM,
+ const SymbolCollector::Options &Opts)
+ : PP(PP), SM(SM), Includes(Opts.Includes), FallbackDir(Opts.FallbackDir) {
+ }
+
+ // Returns a canonical URI for the file \p FE.
+ // We attempt to make the path absolute first.
+ const std::string &toURI(const FileEntry *FE) {
+ auto R = CacheFEToURI.try_emplace(FE);
+ if (R.second) {
+ auto CanonPath = getCanonicalPath(FE, SM);
+ R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE->getName());
+ }
+ return *R.first->second;
+ }
+
+ // Returns a canonical URI for \p Path.
+ // If the file is in the FileManager, use that to canonicalize the path.
+ // We attempt to make the path absolute in any case.
+ const std::string &toURI(llvm::StringRef Path) {
+ if (auto File = SM.getFileManager().getFile(Path))
+ return toURI(*File);
+ return toURIInternal(Path);
+ }
+
+ // Gets a canonical include (URI of the header or <header> or "header") for
+ // header of \p FID (which should usually be the *expansion* file).
+ // This does not account for any per-symbol overrides!
+ // Returns "" if includes should not be inserted for this file.
+ llvm::StringRef getIncludeHeader(FileID FID) {
+ auto R = CacheFIDToInclude.try_emplace(FID);
+ if (R.second)
+ R.first->second = getIncludeHeaderUncached(FID);
+ return R.first->second;
+ }
+
+private:
+ // This takes care of making paths absolute and path->URI caching, but no
+ // FileManager-based canonicalization.
+ const std::string &toURIInternal(llvm::StringRef Path) {
+ auto R = CachePathToURI.try_emplace(Path);
+ if (R.second) {
+ llvm::SmallString<256> AbsPath = Path;
+ if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
+ llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
+ assert(llvm::sys::path::is_absolute(AbsPath) &&
+ "If the VFS can't make paths absolute, a FallbackDir must be "
+ "provided");
+ llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
+ R.first->second = URI::create(AbsPath).toString();
+ }
+ return R.first->second;
+ }
+
+ llvm::StringRef getIncludeHeaderUncached(FileID FID) {
+ const FileEntry *FE = SM.getFileEntryForID(FID);
+ if (!FE || FE->getName().empty())
+ return "";
+ llvm::StringRef Filename = FE->getName();
+ // If a file is mapped by canonical headers, use that mapping, regardless
+ // of whether it's an otherwise-good header (header guards etc).
+ if (Includes) {
+ llvm::StringRef Canonical = Includes->mapHeader(Filename);
+ if (!Canonical.empty()) {
+ // If we had a mapping, always use it.
+ if (Canonical.startswith("<") || Canonical.startswith("\""))
+ return Canonical;
+ return toURI(Canonical);
+ }
+ }
+ if (!isSelfContainedHeader(FID, FE)) {
+ // A .inc or .def file is often included into a real header to define
+ // symbols (e.g. LLVM tablegen files).
+ if (Filename.endswith(".inc") || Filename.endswith(".def"))
+ // Don't use cache reentrantly due to iterator invalidation.
+ return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
+ // Conservatively refuse to insert #includes to files without guards.
+ return "";
+ }
+ // Standard case: just insert the file itself.
+ return toURI(FE);
+ }
+
+ bool isSelfContainedHeader(FileID FID, const FileEntry *FE) {
+ // FIXME: Should files that have been #import'd be considered
+ // self-contained? That's really a property of the includer,
+ // not of the file.
+ if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
+ !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
+ return false;
+ // This pattern indicates that a header can't be used without
+ // particular preprocessor state, usually set up by another header.
+ if (isDontIncludeMeHeader(SM.getBufferData(FID)))
+ return false;
+ return true;
+ }
+
+ // Is Line an #if or #ifdef directive?
+ static bool isIf(llvm::StringRef Line) {
+ Line = Line.ltrim();
+ if (!Line.consume_front("#"))
+ return false;
+ Line = Line.ltrim();
+ return Line.startswith("if");
+ }
+
+ // Is Line an #error directive mentioning includes?
+ static bool isErrorAboutInclude(llvm::StringRef Line) {
+ Line = Line.ltrim();
+ if (!Line.consume_front("#"))
+ return false;
+ Line = Line.ltrim();
+ if (!Line.startswith("error"))
+ return false;
+ return Line.contains_lower("includ"); // Matches "include" or "including".
+ }
+
+ // Heuristically headers that only want to be included via an umbrella.
+ static bool isDontIncludeMeHeader(llvm::StringRef Content) {
+ llvm::StringRef Line;
+ // Only sniff up to 100 lines or 10KB.
+ Content = Content.take_front(100 * 100);
+ for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
+ std::tie(Line, Content) = Content.split('\n');
+ if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
+ return true;
+ }
+ return false;
+ }
+};
+
+// Return the symbol location of the token at \p TokLoc.
+llvm::Optional<SymbolLocation>
+SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
+ const auto &SM = ASTCtx->getSourceManager();
+ auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc));
+ if (!FE)
+ return None;
+
+ SymbolLocation Result;
+ Result.FileURI = HeaderFileURIs->toURI(FE).c_str();
+ auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
+ Result.Start = Range.first;
+ Result.End = Range.second;
+
+ return Result;
+}
+
SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
+SymbolCollector::~SymbolCollector() = default;
void SymbolCollector::initialize(ASTContext &Ctx) {
ASTCtx = &Ctx;
+ HeaderFileURIs = std::make_unique<HeaderFileURICache>(
+ PP, ASTCtx->getSourceManager(), Opts);
CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
CompletionTUInfo =
std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
@@ -262,7 +386,7 @@ bool SymbolCollector::handleDeclOccurrence(
const Decl *D, index::SymbolRoleSet Roles,
llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
index::IndexDataConsumer::ASTNodeInfo ASTNode) {
- assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
+ assert(ASTCtx && PP.get() && HeaderFileURIs);
assert(CompletionAllocator && CompletionTUInfo);
assert(ASTNode.OrigD);
// Indexing API puts canonical decl into D, which might not have a valid
@@ -383,12 +507,12 @@ bool SymbolCollector::handleDeclOccurrence(
}
void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
- assert(PP.get());
+ assert(HeaderFileURIs && PP.get());
const auto &SM = PP->getSourceManager();
const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
assert(MainFileEntry);
- const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
+ const std::string &MainFileURI = HeaderFileURIs->toURI(MainFileEntry);
// Add macro references.
for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
for (const auto &MacroRef : IDToRefs.second) {
@@ -486,11 +610,9 @@ bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
}
S.SymInfo = index::getSymbolInfoForMacro(*MI);
S.Origin = Opts.Origin;
- std::string FileURI;
// FIXME: use the result to filter out symbols.
shouldIndexFile(SM.getFileID(Loc));
- if (auto DeclLoc =
- getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
+ if (auto DeclLoc = getTokenLocation(DefLoc))
S.CanonicalDeclaration = *DeclLoc;
CodeCompletionResult SymbolCompletion(Name);
@@ -578,44 +700,48 @@ void SymbolCollector::finish() {
}
// Fill in IncludeHeaders.
// We delay this until end of TU so header guards are all resolved.
- // Symbols in slabs aren't mutable, so insert() has to walk all the strings
- // :-(
- for (const auto &Entry : IncludeFiles)
+ llvm::SmallString<128> QName;
+ for (const auto &Entry : IncludeFiles) {
if (const Symbol *S = Symbols.find(Entry.first)) {
- if (auto Header = getIncludeHeader(*S, Entry.second)) {
+ llvm::StringRef IncludeHeader;
+ // Look for an overridden include header for this symbol specifically.
+ if (Opts.Includes) {
+ QName = S->Scope;
+ QName.append(S->Name);
+ IncludeHeader = Opts.Includes->mapSymbol(QName);
+ if (!IncludeHeader.empty()) {
+ if (IncludeHeader.front() != '"' && IncludeHeader.front() != '<')
+ IncludeHeader = HeaderFileURIs->toURI(IncludeHeader);
+ else if (IncludeHeader == "<utility>" && QName == "std::move" &&
+ S->Signature.contains(','))
+ IncludeHeader = "<algorithm>";
+ }
+ }
+ // Otherwise find the approprate include header for the defining file.
+ if (IncludeHeader.empty())
+ IncludeHeader = HeaderFileURIs->getIncludeHeader(Entry.second);
+
+ // Symbols in slabs aren't mutable, insert() has to walk all the strings
+ if (!IncludeHeader.empty()) {
Symbol NewSym = *S;
- NewSym.IncludeHeaders.push_back({std::move(*Header), 1});
+ NewSym.IncludeHeaders.push_back({IncludeHeader, 1});
Symbols.insert(NewSym);
}
}
+ }
const auto &SM = ASTCtx->getSourceManager();
- llvm::DenseMap<FileID, std::string> URICache;
- auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
- auto Found = URICache.find(FID);
- if (Found == URICache.end()) {
- if (auto *FileEntry = SM.getFileEntryForID(FID)) {
- auto FileURI = toURI(SM, FileEntry->getName(), Opts);
- Found = URICache.insert({FID, FileURI}).first;
- } else {
- // Ignore cases where we can not find a corresponding file entry for
- // given location, e.g. symbols formed via macro concatenation.
- return None;
- }
- }
- return Found->second;
- };
auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
bool Spelled = false) {
auto FileID = SM.getFileID(LocAndRole.Loc);
// FIXME: use the result to filter out references.
shouldIndexFile(FileID);
- if (auto FileURI = GetURI(FileID)) {
+ if (const auto *FE = SM.getFileEntryForID(FileID)) {
auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
Ref R;
R.Location.Start = Range.first;
R.Location.End = Range.second;
- R.Location.FileURI = FileURI->c_str();
+ R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
R.Kind = toRefKind(LocAndRole.Roles, Spelled);
R.Container = getSymbolID(LocAndRole.Container);
Refs.insert(ID, R);
@@ -656,8 +782,6 @@ void SymbolCollector::finish() {
ReferencedDecls.clear();
ReferencedMacros.clear();
DeclRefs.clear();
- FilesToIndexCache.clear();
- HeaderIsSelfContainedCache.clear();
IncludeFiles.clear();
}
@@ -683,13 +807,11 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
if (!IsMainFileOnly)
S.Flags |= Symbol::VisibleOutsideFile;
S.SymInfo = index::getSymbolInfo(&ND);
- std::string FileURI;
auto Loc = nameLocation(ND, SM);
assert(Loc.isValid() && "Invalid source location for NamedDecl");
// FIXME: use the result to filter out symbols.
shouldIndexFile(SM.getFileID(Loc));
- if (auto DeclLoc =
- getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
+ if (auto DeclLoc = getTokenLocation(Loc))
S.CanonicalDeclaration = *DeclLoc;
S.Origin = Opts.Origin;
@@ -743,114 +865,15 @@ void SymbolCollector::addDefinition(const NamedDecl &ND,
// This is not ideal, but avoids duplicating the "is this a definition" check
// in clang::index. We should only see one definition.
Symbol S = DeclSym;
- std::string FileURI;
const auto &SM = ND.getASTContext().getSourceManager();
auto Loc = nameLocation(ND, SM);
// FIXME: use the result to filter out symbols.
shouldIndexFile(SM.getFileID(Loc));
- if (auto DefLoc =
- getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
+ if (auto DefLoc = getTokenLocation(Loc))
S.Definition = *DefLoc;
Symbols.insert(S);
}
-/// Gets a canonical include (URI of the header or <header> or "header") for
-/// header of \p FID (which should usually be the *expansion* file).
-/// Returns None if includes should not be inserted for this file.
-llvm::Optional<std::string> SymbolCollector::getIncludeHeader(const Symbol &S,
- FileID FID) {
- const SourceManager &SM = ASTCtx->getSourceManager();
- const FileEntry *FE = SM.getFileEntryForID(FID);
- if (!FE || FE->getName().empty())
- return llvm::None;
- llvm::StringRef Filename = FE->getName();
- // If a file is mapped by canonical headers, use that mapping, regardless
- // of whether it's an otherwise-good header (header guards etc).
- if (Opts.Includes) {
- llvm::SmallString<256> QName = S.Scope;
- QName.append(S.Name);
- llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
- // If we had a mapping, always use it.
- if (Canonical.startswith("<") || Canonical.startswith("\"")) {
- // Hack: there are two std::move() overloads from
diff erent headers.
- // CanonicalIncludes returns the common one-arg one from <utility>.
- if (Canonical == "<utility>" && S.Name == "move" &&
- S.Signature.contains(','))
- Canonical = "<algorithm>";
- return Canonical.str();
- }
- if (Canonical != Filename)
- return toURI(SM, Canonical, Opts);
- }
- if (!isSelfContainedHeader(FID)) {
- // A .inc or .def file is often included into a real header to define
- // symbols (e.g. LLVM tablegen files).
- if (Filename.endswith(".inc") || Filename.endswith(".def"))
- return getIncludeHeader(S, SM.getFileID(SM.getIncludeLoc(FID)));
- // Conservatively refuse to insert #includes to files without guards.
- return llvm::None;
- }
- // Standard case: just insert the file itself.
- return toURI(SM, Filename, Opts);
-}
-
-bool SymbolCollector::isSelfContainedHeader(FileID FID) {
- // The real computation (which will be memoized).
- auto Compute = [&] {
- const SourceManager &SM = ASTCtx->getSourceManager();
- const FileEntry *FE = SM.getFileEntryForID(FID);
- if (!FE)
- return false;
- // FIXME: Should files that have been #import'd be considered
- // self-contained? That's really a property of the includer,
- // not of the file.
- if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
- !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
- return false;
- // This pattern indicates that a header can't be used without
- // particular preprocessor state, usually set up by another header.
- if (isDontIncludeMeHeader(SM.getBufferData(FID)))
- return false;
- return true;
- };
-
- auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
- if (R.second)
- R.first->second = Compute();
- return R.first->second;
-}
-
-// Is Line an #if or #ifdef directive?
-static bool isIf(llvm::StringRef Line) {
- Line = Line.ltrim();
- if (!Line.consume_front("#"))
- return false;
- Line = Line.ltrim();
- return Line.startswith("if");
-}
-// Is Line an #error directive mentioning includes?
-static bool isErrorAboutInclude(llvm::StringRef Line) {
- Line = Line.ltrim();
- if (!Line.consume_front("#"))
- return false;
- Line = Line.ltrim();
- if (!Line.startswith("error"))
- return false;
- return Line.contains_lower("includ"); // Matches "include" or "including".
-}
-
-bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
- llvm::StringRef Line;
- // Only sniff up to 100 lines or 10KB.
- Content = Content.take_front(100 * 100);
- for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
- std::tie(Line, Content) = Content.split('\n');
- if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
- return true;
- }
- return false;
-}
-
bool SymbolCollector::shouldIndexFile(FileID FID) {
if (!Opts.FileFilter)
return true;
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h
index 92f847f3d8f3..af184fde69c6 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.h
+++ b/clang-tools-extra/clangd/index/SymbolCollector.h
@@ -91,6 +91,7 @@ class SymbolCollector : public index::IndexDataConsumer {
};
SymbolCollector(Options Opts);
+ ~SymbolCollector();
/// Returns true is \p ND should be collected.
static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx,
@@ -132,10 +133,9 @@ class SymbolCollector : public index::IndexDataConsumer {
void processRelations(const NamedDecl &ND, const SymbolID &ID,
ArrayRef<index::SymbolRelation> Relations);
+ llvm::Optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc);
+
llvm::Optional<std::string> getIncludeHeader(const Symbol &S, FileID);
- bool isSelfContainedHeader(FileID);
- // Heuristically headers that only want to be included via an umbrella.
- static bool isDontIncludeMeHeader(llvm::StringRef);
// All Symbols collected from the AST.
SymbolSlab::Builder Symbols;
@@ -175,7 +175,10 @@ class SymbolCollector : public index::IndexDataConsumer {
llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls;
// Cache whether to index a file or not.
llvm::DenseMap<FileID, bool> FilesToIndexCache;
- llvm::DenseMap<FileID, bool> HeaderIsSelfContainedCache;
+ // Encapsulates calculations and caches around header paths, which headers
+ // to insert for which symbol, etc.
+ class HeaderFileURICache;
+ std::unique_ptr<HeaderFileURICache> HeaderFileURIs;
};
} // namespace clangd
diff --git a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
index fa96a4579624..31f99b7615d3 100644
--- a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp
@@ -20,11 +20,8 @@ TEST(CanonicalIncludesTest, CStandardLibrary) {
Language.C11 = true;
CI.addSystemHeadersMapping(Language);
// Usual standard library symbols are mapped correctly.
- EXPECT_EQ("<stdio.h>", CI.mapHeader("path/stdio.h", "printf"));
- // Suffix mapping isn't available for C, instead of mapping to `<cstdio> we
- // just leave the header as-is.
- EXPECT_EQ("include/stdio.h",
- CI.mapHeader("include/stdio.h", "unknown_symbol"));
+ EXPECT_EQ("<stdio.h>", CI.mapSymbol("printf"));
+ EXPECT_EQ("", CI.mapSymbol("unknown_symbol"));
}
TEST(CanonicalIncludesTest, CXXStandardLibrary) {
@@ -34,17 +31,16 @@ TEST(CanonicalIncludesTest, CXXStandardLibrary) {
CI.addSystemHeadersMapping(Language);
// Usual standard library symbols are mapped correctly.
- EXPECT_EQ("<vector>", CI.mapHeader("path/vector.h", "std::vector"));
- EXPECT_EQ("<cstdio>", CI.mapHeader("path/stdio.h", "std::printf"));
+ EXPECT_EQ("<vector>", CI.mapSymbol("std::vector"));
+ EXPECT_EQ("<cstdio>", CI.mapSymbol("std::printf"));
// std::move is ambiguous, currently always mapped to <utility>
- EXPECT_EQ("<utility>",
- CI.mapHeader("libstdc++/bits/stl_algo.h", "std::move"));
+ EXPECT_EQ("<utility>", CI.mapSymbol("std::move"));
// Unknown std symbols aren't mapped.
- EXPECT_EQ("foo/bar.h", CI.mapHeader("foo/bar.h", "std::notathing"));
+ EXPECT_EQ("", CI.mapSymbol("std::notathing"));
// iosfwd declares some symbols it doesn't own.
- EXPECT_EQ("<ostream>", CI.mapHeader("iosfwd", "std::ostream"));
+ EXPECT_EQ("<ostream>", CI.mapSymbol("std::ostream"));
// And (for now) we assume it owns the others.
- EXPECT_EQ("<iosfwd>", CI.mapHeader("iosfwd", "std::notwathing"));
+ EXPECT_EQ("<iosfwd>", CI.mapHeader("iosfwd"));
}
TEST(CanonicalIncludesTest, PathMapping) {
@@ -52,20 +48,8 @@ TEST(CanonicalIncludesTest, PathMapping) {
CanonicalIncludes CI;
CI.addMapping("foo/bar", "<baz>");
- EXPECT_EQ("<baz>", CI.mapHeader("foo/bar", "some::symbol"));
- EXPECT_EQ("bar/bar", CI.mapHeader("bar/bar", "some::symbol"));
-}
-
-TEST(CanonicalIncludesTest, SymbolMapping) {
- // As used for standard library.
- CanonicalIncludes CI;
- LangOptions Language;
- Language.CPlusPlus = true;
- // Ensures 'std::vector' is mapped to '<vector>'.
- CI.addSystemHeadersMapping(Language);
-
- EXPECT_EQ("<vector>", CI.mapHeader("foo/bar", "std::vector"));
- EXPECT_EQ("foo/bar", CI.mapHeader("foo/bar", "other::symbol"));
+ EXPECT_EQ("<baz>", CI.mapHeader("foo/bar"));
+ EXPECT_EQ("", CI.mapHeader("bar/bar"));
}
TEST(CanonicalIncludesTest, Precedence) {
@@ -76,15 +60,9 @@ TEST(CanonicalIncludesTest, Precedence) {
CI.addSystemHeadersMapping(Language);
// We added a mapping from some/path to <path>.
- ASSERT_EQ("<path>", CI.mapHeader("some/path", ""));
+ ASSERT_EQ("<path>", CI.mapHeader("some/path"));
// We should have a path from 'bits/stl_vector.h' to '<vector>'.
- ASSERT_EQ("<vector>", CI.mapHeader("bits/stl_vector.h", ""));
- // We should also have a symbol mapping from 'std::map' to '<map>'.
- ASSERT_EQ("<map>", CI.mapHeader("some/header.h", "std::map"));
-
- // And the symbol mapping should take precedence over paths mapping.
- EXPECT_EQ("<map>", CI.mapHeader("bits/stl_vector.h", "std::map"));
- EXPECT_EQ("<map>", CI.mapHeader("some/path", "std::map"));
+ ASSERT_EQ("<vector>", CI.mapHeader("bits/stl_vector.h"));
}
} // namespace
diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
index 924cfd03cba7..87df23baf48a 100644
--- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -1463,9 +1463,6 @@ TEST_F(SymbolCollectorTest, CanonicalSTLHeader) {
}
)cpp",
/*Main=*/"");
- for (const auto &S : Symbols)
- llvm::errs() << S.Scope << S.Name << " in " << S.IncludeHeaders.size()
- << "\n";
EXPECT_THAT(
Symbols,
UnorderedElementsAre(
More information about the cfe-commits
mailing list