[clang-tools-extra] dc4cd43 - [clangd] Add a textual fallback for go-to-definition
Nathan Ridge via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 12 13:33:25 PDT 2020
Author: Sam McCall
Date: 2020-03-12T16:33:08-04:00
New Revision: dc4cd43904df92565dbacaa501db98eb9683551b
URL: https://github.com/llvm/llvm-project/commit/dc4cd43904df92565dbacaa501db98eb9683551b
DIFF: https://github.com/llvm/llvm-project/commit/dc4cd43904df92565dbacaa501db98eb9683551b.diff
LOG: [clangd] Add a textual fallback for go-to-definition
Summary:
This facilitates performing go-to-definition in contexts where AST-based
resolution does not work, such as comments, string literals, preprocessor
disabled regions, and macro definitions, based on textual lookup in the index.
Partially fixes https://github.com/clangd/clangd/issues/241
Reviewers: sammccall
Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D72874
Added:
Modified:
clang-tools-extra/clangd/FindSymbols.cpp
clang-tools-extra/clangd/FindSymbols.h
clang-tools-extra/clangd/XRefs.cpp
clang-tools-extra/clangd/XRefs.h
clang-tools-extra/clangd/unittests/XRefsTests.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp
index ee6e256ae1bc..06c124c0b35b 100644
--- a/clang-tools-extra/clangd/FindSymbols.cpp
+++ b/clang-tools-extra/clangd/FindSymbols.cpp
@@ -18,6 +18,7 @@
#include "clang/Index/IndexDataConsumer.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Index/IndexingAction.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -39,30 +40,33 @@ struct ScoredSymbolGreater {
} // namespace
-llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
- llvm::StringRef HintPath) {
- // Prefer the definition over e.g. a function declaration in a header
- auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration;
- auto Path = URI::resolve(CD.FileURI, HintPath);
+llvm::Expected<Location> indexToLSPLocation(const SymbolLocation &Loc,
+ llvm::StringRef TUPath) {
+ auto Path = URI::resolve(Loc.FileURI, TUPath);
if (!Path) {
return llvm::make_error<llvm::StringError>(
- formatv("Could not resolve path for symbol '{0}': {1}",
- Sym.Name, llvm::toString(Path.takeError())),
+ llvm::formatv("Could not resolve path for file '{0}': {1}", Loc.FileURI,
+ llvm::toString(Path.takeError())),
llvm::inconvertibleErrorCode());
}
Location L;
- // Use HintPath as TUPath since there is no TU associated with this
- // request.
- L.uri = URIForFile::canonicalize(*Path, HintPath);
+ L.uri = URIForFile::canonicalize(*Path, TUPath);
Position Start, End;
- Start.line = CD.Start.line();
- Start.character = CD.Start.column();
- End.line = CD.End.line();
- End.character = CD.End.column();
+ Start.line = Loc.Start.line();
+ Start.character = Loc.Start.column();
+ End.line = Loc.End.line();
+ End.character = Loc.End.column();
L.range = {Start, End};
return L;
}
+llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
+ llvm::StringRef TUPath) {
+ // Prefer the definition over e.g. a function declaration in a header
+ return indexToLSPLocation(
+ Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, TUPath);
+}
+
llvm::Expected<std::vector<SymbolInformation>>
getWorkspaceSymbols(llvm::StringRef Query, int Limit,
const SymbolIndex *const Index, llvm::StringRef HintPath) {
diff --git a/clang-tools-extra/clangd/FindSymbols.h b/clang-tools-extra/clangd/FindSymbols.h
index b474ed135321..5110e8bdd3a9 100644
--- a/clang-tools-extra/clangd/FindSymbols.h
+++ b/clang-tools-extra/clangd/FindSymbols.h
@@ -21,9 +21,13 @@ namespace clangd {
class ParsedAST;
class SymbolIndex;
+/// Helper function for deriving an LSP Location from an index SymbolLocation.
+llvm::Expected<Location> indexToLSPLocation(const SymbolLocation &Loc,
+ llvm::StringRef TUPath);
+
/// Helper function for deriving an LSP Location for a Symbol.
llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
- llvm::StringRef HintPath);
+ llvm::StringRef TUPath);
/// Searches for the symbols matching \p Query. The syntax of \p Query can be
/// the non-qualified name or fully qualified of a symbol. For example,
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 67f7bda6a5e6..acf9f6df8281 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -13,6 +13,7 @@
#include "Logger.h"
#include "ParsedAST.h"
#include "Protocol.h"
+#include "Quality.h"
#include "Selection.h"
#include "SourceCode.h"
#include "URI.h"
@@ -28,6 +29,7 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/Type.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
@@ -173,12 +175,10 @@ llvm::Optional<Location> makeLocation(const ASTContext &AST, SourceLocation Loc,
return L;
}
-} // namespace
-
// Treat #included files as symbols, to enable go-to-definition on them.
-static llvm::Optional<LocatedSymbol>
-locateFileReferent(const Position &Pos, ParsedAST &AST,
- llvm::StringRef MainFilePath) {
+llvm::Optional<LocatedSymbol> locateFileReferent(const Position &Pos,
+ ParsedAST &AST,
+ llvm::StringRef MainFilePath) {
for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) {
if (!Inc.Resolved.empty() && Inc.R.start.line == Pos.line) {
LocatedSymbol File;
@@ -195,7 +195,7 @@ locateFileReferent(const Position &Pos, ParsedAST &AST,
// Macros are simple: there's no declaration/definition distinction.
// As a consequence, there's no need to look them up in the index either.
-static llvm::Optional<LocatedSymbol>
+llvm::Optional<LocatedSymbol>
locateMacroReferent(const syntax::Token &TouchedIdentifier, ParsedAST &AST,
llvm::StringRef MainFilePath) {
if (auto M = locateMacroAt(TouchedIdentifier, AST.getPreprocessor())) {
@@ -215,7 +215,7 @@ locateMacroReferent(const syntax::Token &TouchedIdentifier, ParsedAST &AST,
// The AST contains at least a declaration, maybe a definition.
// These are up-to-date, and so generally preferred over index results.
// We perform a single batch index lookup to find additional definitions.
-static std::vector<LocatedSymbol>
+std::vector<LocatedSymbol>
locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier,
ParsedAST &AST, llvm::StringRef MainFilePath,
const SymbolIndex *Index) {
@@ -316,6 +316,160 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier,
return Result;
}
+llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) {
+ unsigned B = Offset, E = Offset;
+ while (B > 0 && isIdentifierBody(Code[B - 1]))
+ --B;
+ while (E < Code.size() && isIdentifierBody(Code[E]))
+ ++E;
+ return Code.slice(B, E);
+}
+
+bool isLikelyToBeIdentifier(StringRef Word) {
+ // Word contains underscore.
+ // This handles things like snake_case and MACRO_CASE.
+ if (Word.contains('_')) {
+ return true;
+ }
+ // Word contains capital letter other than at beginning.
+ // This handles things like lowerCamel and UpperCamel.
+ // The check for also containing a lowercase letter is to rule out
+ // initialisms like "HTTP".
+ bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
+ bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
+ if (HasLower && HasUpper) {
+ return true;
+ }
+ // FIXME: There are other signals we could listen for.
+ // Some of these require inspecting the surroundings of the word as well.
+ // - mid-sentence Capitalization
+ // - markup like quotes / backticks / brackets / "\p"
+ // - word has a qualifier (foo::bar)
+ return false;
+}
+
+bool tokenSurvivedPreprocessing(SourceLocation Loc,
+ const syntax::TokenBuffer &TB) {
+ auto WordExpandedTokens =
+ TB.expandedTokens(TB.sourceManager().getMacroArgExpandedLocation(Loc));
+ return !WordExpandedTokens.empty();
+}
+
+} // namespace
+
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+ SourceLocation Loc,
+ const std::string &MainFilePath) {
+ const auto &SM = AST.getSourceManager();
+
+ // Get the raw word at the specified location.
+ unsigned Pos;
+ FileID File;
+ std::tie(File, Pos) = SM.getDecomposedLoc(Loc);
+ llvm::StringRef Code = SM.getBufferData(File);
+ llvm::StringRef Word = wordTouching(Code, Pos);
+ if (Word.empty())
+ return {};
+ unsigned WordOffset = Word.data() - Code.data();
+ SourceLocation WordStart = SM.getComposedLoc(File, WordOffset);
+
+ // Do not consider tokens that survived preprocessing.
+ // We are erring on the safe side here, as a user may expect to get
+ // accurate (as opposed to textual-heuristic) results for such tokens.
+ // FIXME: Relax this for dependent code.
+ if (tokenSurvivedPreprocessing(WordStart, AST.getTokens()))
+ return {};
+
+ // Additionally filter for signals that the word is likely to be an
+ // identifier. This avoids triggering on e.g. random words in a comment.
+ if (!isLikelyToBeIdentifier(Word))
+ return {};
+
+ // Look up the selected word in the index.
+ FuzzyFindRequest Req;
+ Req.Query = Word.str();
+ Req.ProximityPaths = {MainFilePath};
+ Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts());
+ // FIXME: For extra strictness, consider AnyScope=false.
+ Req.AnyScope = true;
+ // We limit the results to 3 further below. This limit is to avoid fetching
+ // too much data, while still likely having enough for 3 results to remain
+ // after additional filtering.
+ Req.Limit = 10;
+ bool TooMany = false;
+ using ScoredLocatedSymbol = std::pair<float, LocatedSymbol>;
+ std::vector<ScoredLocatedSymbol> ScoredResults;
+ Index->fuzzyFind(Req, [&](const Symbol &Sym) {
+ // Only consider exact name matches, including case.
+ // This is to avoid too many false positives.
+ // We could relax this in the future (e.g. to allow for typos) if we make
+ // the query more accurate by other means.
+ if (Sym.Name != Word)
+ return;
+
+ // Exclude constructor results. They have the same name as the class,
+ // but we don't have enough context to prefer them over the class.
+ if (Sym.SymInfo.Kind == index::SymbolKind::Constructor)
+ return;
+
+ auto MaybeDeclLoc =
+ indexToLSPLocation(Sym.CanonicalDeclaration, MainFilePath);
+ if (!MaybeDeclLoc) {
+ log("locateSymbolNamedTextuallyAt: {0}", MaybeDeclLoc.takeError());
+ return;
+ }
+ Location DeclLoc = *MaybeDeclLoc;
+ Location DefLoc;
+ if (Sym.Definition) {
+ auto MaybeDefLoc = indexToLSPLocation(Sym.Definition, MainFilePath);
+ if (!MaybeDefLoc) {
+ log("locateSymbolNamedTextuallyAt: {0}", MaybeDefLoc.takeError());
+ return;
+ }
+ DefLoc = *MaybeDefLoc;
+ }
+
+ if (ScoredResults.size() >= 3) {
+ // If we have more than 3 results, don't return anything,
+ // as confidence is too low.
+ // FIXME: Alternatively, try a stricter query?
+ TooMany = true;
+ return;
+ }
+
+ LocatedSymbol Located;
+ Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str();
+ Located.PreferredDeclaration = bool(Sym.Definition) ? DefLoc : DeclLoc;
+ Located.Definition = DefLoc;
+
+ SymbolQualitySignals Quality;
+ Quality.merge(Sym);
+ SymbolRelevanceSignals Relevance;
+ Relevance.Name = Sym.Name;
+ Relevance.Query = SymbolRelevanceSignals::Generic;
+ Relevance.merge(Sym);
+ auto Score =
+ evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate());
+ dlog("locateSymbolNamedTextuallyAt: {0}{1} = {2}\n{3}{4}\n", Sym.Scope,
+ Sym.Name, Score, Quality, Relevance);
+
+ ScoredResults.push_back({Score, std::move(Located)});
+ });
+
+ if (TooMany)
+ return {};
+
+ llvm::sort(ScoredResults,
+ [](const ScoredLocatedSymbol &A, const ScoredLocatedSymbol &B) {
+ return A.first > B.first;
+ });
+ std::vector<LocatedSymbol> Results;
+ for (auto &Res : std::move(ScoredResults))
+ Results.push_back(std::move(Res.second));
+ return Results;
+}
+
std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index) {
const auto &SM = AST.getSourceManager();
@@ -346,8 +500,12 @@ std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
// expansion.)
return {*std::move(Macro)};
- return locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath,
- Index);
+ auto ASTResults =
+ locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, Index);
+ if (!ASTResults.empty())
+ return ASTResults;
+
+ return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath);
}
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {
diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h
index bc8deb1e3df7..8f42ca8d3265 100644
--- a/clang-tools-extra/clangd/XRefs.h
+++ b/clang-tools-extra/clangd/XRefs.h
@@ -49,6 +49,21 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LocatedSymbol &);
std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index = nullptr);
+// Tries to provide a textual fallback for locating a symbol referenced at
+// a location, by looking up the word under the cursor as a symbol name in the
+// index. The aim is to pick up references to symbols in contexts where
+// AST-based resolution does not work, such as comments, strings, and PP
+// disabled regions. The implementation takes a number of measures to avoid
+// false positives, such as looking for some signal that the word at the
+// given location is likely to be an identifier. The function does not
+// currently return results for locations that end up as real expanded
+// tokens, although this may be relaxed for e.g. dependent code in the future.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+ SourceLocation Loc,
+ const std::string &MainFilePath);
+
/// Get all document links
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index 098a9421a8bb..d387fd219a08 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -38,6 +38,7 @@ namespace clangd {
namespace {
using ::testing::ElementsAre;
+using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::Matcher;
using ::testing::UnorderedElementsAreArray;
@@ -353,7 +354,7 @@ TEST(LocateSymbol, All) {
R"cpp(// Symbol concatenated inside macro (not supported)
int *pi;
#define POINTER(X) p ## X;
- int i = *POINTER(^i);
+ int x = *POINTER(^i);
)cpp",
R"cpp(// Forward class declaration
@@ -606,6 +607,81 @@ TEST(LocateSymbol, Warnings) {
}
}
+TEST(LocateSymbol, TextualSmoke) {
+ auto T = Annotations(
+ R"cpp(
+ struct [[MyClass]] {};
+ // Comment mentioning M^yClass
+ )cpp");
+
+ auto TU = TestTU::withCode(T.code());
+ auto AST = TU.build();
+ auto Index = TU.index();
+ EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()),
+ ElementsAre(Sym("MyClass", T.range())));
+}
+
+TEST(LocateSymbol, Textual) {
+ const char *Tests[] = {
+ R"cpp(// Comment
+ struct [[MyClass]] {};
+ // Comment mentioning M^yClass
+ )cpp",
+ R"cpp(// String
+ struct [[MyClass]] {};
+ const char* s = "String literal mentioning M^yClass";
+ )cpp",
+ R"cpp(// Ifdef'ed out code
+ struct [[MyClass]] {};
+ #ifdef WALDO
+ M^yClass var;
+ #endif
+ )cpp",
+ R"cpp(// Macro definition
+ struct [[MyClass]] {};
+ #define DECLARE_MYCLASS_OBJ(name) M^yClass name;
+ )cpp",
+ R"cpp(// Invalid code
+ /*error-ok*/
+ int myFunction(int);
+ // Not triggered for token which survived preprocessing.
+ int var = m^yFunction();
+ )cpp",
+ R"cpp(// Dependent type
+ struct Foo {
+ void uniqueMethodName();
+ };
+ template <typename T>
+ void f(T t) {
+ // Not triggered for token which survived preprocessing.
+ t->u^niqueMethodName();
+ }
+ )cpp"};
+
+ for (const char *Test : Tests) {
+ Annotations T(Test);
+ llvm::Optional<Range> WantDecl;
+ if (!T.ranges().empty())
+ WantDecl = T.range();
+
+ auto TU = TestTU::withCode(T.code());
+
+ auto AST = TU.build();
+ auto Index = TU.index();
+ auto Results = locateSymbolNamedTextuallyAt(
+ AST, Index.get(),
+ cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
+ testPath(TU.Filename));
+
+ if (!WantDecl) {
+ EXPECT_THAT(Results, IsEmpty()) << Test;
+ } else {
+ ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;
+ EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;
+ }
+ }
+}
+
TEST(LocateSymbol, Ambiguous) {
auto T = Annotations(R"cpp(
struct Foo {
@@ -680,6 +756,30 @@ TEST(LocateSymbol, Ambiguous) {
Sym("baz", T.range("StaticOverload2"))));
}
+TEST(LocateSymbol, TextualAmbiguous) {
+ auto T = Annotations(R"cpp(
+ struct Foo {
+ void $FooLoc[[uniqueMethodName]]();
+ };
+ struct Bar {
+ void $BarLoc[[uniqueMethodName]]();
+ };
+ // Will call u^niqueMethodName() on t.
+ template <typename T>
+ void f(T t);
+ )cpp");
+ auto TU = TestTU::withCode(T.code());
+ auto AST = TU.build();
+ auto Index = TU.index();
+ auto Results = locateSymbolNamedTextuallyAt(
+ AST, Index.get(),
+ cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
+ testPath(TU.Filename));
+ EXPECT_THAT(Results,
+ UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
+ Sym("uniqueMethodName", T.range("BarLoc"))));
+}
+
TEST(LocateSymbol, TemplateTypedefs) {
auto T = Annotations(R"cpp(
template <class T> struct function {};
More information about the cfe-commits
mailing list