[clang-tools-extra] dc4cd43 - [clangd] Add a textual fallback for go-to-definition

Nathan Ridge via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 12 13:33:25 PDT 2020


Author: Sam McCall
Date: 2020-03-12T16:33:08-04:00
New Revision: dc4cd43904df92565dbacaa501db98eb9683551b

URL: https://github.com/llvm/llvm-project/commit/dc4cd43904df92565dbacaa501db98eb9683551b
DIFF: https://github.com/llvm/llvm-project/commit/dc4cd43904df92565dbacaa501db98eb9683551b.diff

LOG: [clangd] Add a textual fallback for go-to-definition

Summary:
This facilitates performing go-to-definition in contexts where AST-based
resolution does not work, such as comments, string literals, preprocessor
disabled regions, and macro definitions, based on textual lookup in the index.

Partially fixes https://github.com/clangd/clangd/issues/241

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D72874

Added: 
    

Modified: 
    clang-tools-extra/clangd/FindSymbols.cpp
    clang-tools-extra/clangd/FindSymbols.h
    clang-tools-extra/clangd/XRefs.cpp
    clang-tools-extra/clangd/XRefs.h
    clang-tools-extra/clangd/unittests/XRefsTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp
index ee6e256ae1bc..06c124c0b35b 100644
--- a/clang-tools-extra/clangd/FindSymbols.cpp
+++ b/clang-tools-extra/clangd/FindSymbols.cpp
@@ -18,6 +18,7 @@
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Index/IndexingAction.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
@@ -39,30 +40,33 @@ struct ScoredSymbolGreater {
 
 } // namespace
 
-llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
-                                          llvm::StringRef HintPath) {
-  // Prefer the definition over e.g. a function declaration in a header
-  auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration;
-  auto Path = URI::resolve(CD.FileURI, HintPath);
+llvm::Expected<Location> indexToLSPLocation(const SymbolLocation &Loc,
+                                            llvm::StringRef TUPath) {
+  auto Path = URI::resolve(Loc.FileURI, TUPath);
   if (!Path) {
     return llvm::make_error<llvm::StringError>(
-        formatv("Could not resolve path for symbol '{0}': {1}",
-                Sym.Name, llvm::toString(Path.takeError())),
+        llvm::formatv("Could not resolve path for file '{0}': {1}", Loc.FileURI,
+                      llvm::toString(Path.takeError())),
         llvm::inconvertibleErrorCode());
   }
   Location L;
-  // Use HintPath as TUPath since there is no TU associated with this
-  // request.
-  L.uri = URIForFile::canonicalize(*Path, HintPath);
+  L.uri = URIForFile::canonicalize(*Path, TUPath);
   Position Start, End;
-  Start.line = CD.Start.line();
-  Start.character = CD.Start.column();
-  End.line = CD.End.line();
-  End.character = CD.End.column();
+  Start.line = Loc.Start.line();
+  Start.character = Loc.Start.column();
+  End.line = Loc.End.line();
+  End.character = Loc.End.column();
   L.range = {Start, End};
   return L;
 }
 
+llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
+                                          llvm::StringRef TUPath) {
+  // Prefer the definition over e.g. a function declaration in a header
+  return indexToLSPLocation(
+      Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, TUPath);
+}
+
 llvm::Expected<std::vector<SymbolInformation>>
 getWorkspaceSymbols(llvm::StringRef Query, int Limit,
                     const SymbolIndex *const Index, llvm::StringRef HintPath) {

diff  --git a/clang-tools-extra/clangd/FindSymbols.h b/clang-tools-extra/clangd/FindSymbols.h
index b474ed135321..5110e8bdd3a9 100644
--- a/clang-tools-extra/clangd/FindSymbols.h
+++ b/clang-tools-extra/clangd/FindSymbols.h
@@ -21,9 +21,13 @@ namespace clangd {
 class ParsedAST;
 class SymbolIndex;
 
+/// Helper function for deriving an LSP Location from an index SymbolLocation.
+llvm::Expected<Location> indexToLSPLocation(const SymbolLocation &Loc,
+                                            llvm::StringRef TUPath);
+
 /// Helper function for deriving an LSP Location for a Symbol.
 llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
-                                          llvm::StringRef HintPath);
+                                          llvm::StringRef TUPath);
 
 /// Searches for the symbols matching \p Query. The syntax of \p Query can be
 /// the non-qualified name or fully qualified of a symbol. For example,

diff  --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 67f7bda6a5e6..acf9f6df8281 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -13,6 +13,7 @@
 #include "Logger.h"
 #include "ParsedAST.h"
 #include "Protocol.h"
+#include "Quality.h"
 #include "Selection.h"
 #include "SourceCode.h"
 #include "URI.h"
@@ -28,6 +29,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
@@ -173,12 +175,10 @@ llvm::Optional<Location> makeLocation(const ASTContext &AST, SourceLocation Loc,
   return L;
 }
 
-} // namespace
-
 // Treat #included files as symbols, to enable go-to-definition on them.
-static llvm::Optional<LocatedSymbol>
-locateFileReferent(const Position &Pos, ParsedAST &AST,
-                   llvm::StringRef MainFilePath) {
+llvm::Optional<LocatedSymbol> locateFileReferent(const Position &Pos,
+                                                 ParsedAST &AST,
+                                                 llvm::StringRef MainFilePath) {
   for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) {
     if (!Inc.Resolved.empty() && Inc.R.start.line == Pos.line) {
       LocatedSymbol File;
@@ -195,7 +195,7 @@ locateFileReferent(const Position &Pos, ParsedAST &AST,
 
 // Macros are simple: there's no declaration/definition distinction.
 // As a consequence, there's no need to look them up in the index either.
-static llvm::Optional<LocatedSymbol>
+llvm::Optional<LocatedSymbol>
 locateMacroReferent(const syntax::Token &TouchedIdentifier, ParsedAST &AST,
                     llvm::StringRef MainFilePath) {
   if (auto M = locateMacroAt(TouchedIdentifier, AST.getPreprocessor())) {
@@ -215,7 +215,7 @@ locateMacroReferent(const syntax::Token &TouchedIdentifier, ParsedAST &AST,
 // The AST contains at least a declaration, maybe a definition.
 // These are up-to-date, and so generally preferred over index results.
 // We perform a single batch index lookup to find additional definitions.
-static std::vector<LocatedSymbol>
+std::vector<LocatedSymbol>
 locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier,
                   ParsedAST &AST, llvm::StringRef MainFilePath,
                   const SymbolIndex *Index) {
@@ -316,6 +316,160 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier,
   return Result;
 }
 
+llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) {
+  unsigned B = Offset, E = Offset;
+  while (B > 0 && isIdentifierBody(Code[B - 1]))
+    --B;
+  while (E < Code.size() && isIdentifierBody(Code[E]))
+    ++E;
+  return Code.slice(B, E);
+}
+
+bool isLikelyToBeIdentifier(StringRef Word) {
+  // Word contains underscore.
+  // This handles things like snake_case and MACRO_CASE.
+  if (Word.contains('_')) {
+    return true;
+  }
+  // Word contains capital letter other than at beginning.
+  // This handles things like lowerCamel and UpperCamel.
+  // The check for also containing a lowercase letter is to rule out
+  // initialisms like "HTTP".
+  bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
+  bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
+  if (HasLower && HasUpper) {
+    return true;
+  }
+  // FIXME: There are other signals we could listen for.
+  // Some of these require inspecting the surroundings of the word as well.
+  //   - mid-sentence Capitalization
+  //   - markup like quotes / backticks / brackets / "\p"
+  //   - word has a qualifier (foo::bar)
+  return false;
+}
+
+bool tokenSurvivedPreprocessing(SourceLocation Loc,
+                                const syntax::TokenBuffer &TB) {
+  auto WordExpandedTokens =
+      TB.expandedTokens(TB.sourceManager().getMacroArgExpandedLocation(Loc));
+  return !WordExpandedTokens.empty();
+}
+
+} // namespace
+
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+                             SourceLocation Loc,
+                             const std::string &MainFilePath) {
+  const auto &SM = AST.getSourceManager();
+
+  // Get the raw word at the specified location.
+  unsigned Pos;
+  FileID File;
+  std::tie(File, Pos) = SM.getDecomposedLoc(Loc);
+  llvm::StringRef Code = SM.getBufferData(File);
+  llvm::StringRef Word = wordTouching(Code, Pos);
+  if (Word.empty())
+    return {};
+  unsigned WordOffset = Word.data() - Code.data();
+  SourceLocation WordStart = SM.getComposedLoc(File, WordOffset);
+
+  // Do not consider tokens that survived preprocessing.
+  // We are erring on the safe side here, as a user may expect to get
+  // accurate (as opposed to textual-heuristic) results for such tokens.
+  // FIXME: Relax this for dependent code.
+  if (tokenSurvivedPreprocessing(WordStart, AST.getTokens()))
+    return {};
+
+  // Additionally filter for signals that the word is likely to be an
+  // identifier. This avoids triggering on e.g. random words in a comment.
+  if (!isLikelyToBeIdentifier(Word))
+    return {};
+
+  // Look up the selected word in the index.
+  FuzzyFindRequest Req;
+  Req.Query = Word.str();
+  Req.ProximityPaths = {MainFilePath};
+  Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts());
+  // FIXME: For extra strictness, consider AnyScope=false.
+  Req.AnyScope = true;
+  // We limit the results to 3 further below. This limit is to avoid fetching
+  // too much data, while still likely having enough for 3 results to remain
+  // after additional filtering.
+  Req.Limit = 10;
+  bool TooMany = false;
+  using ScoredLocatedSymbol = std::pair<float, LocatedSymbol>;
+  std::vector<ScoredLocatedSymbol> ScoredResults;
+  Index->fuzzyFind(Req, [&](const Symbol &Sym) {
+    // Only consider exact name matches, including case.
+    // This is to avoid too many false positives.
+    // We could relax this in the future (e.g. to allow for typos) if we make
+    // the query more accurate by other means.
+    if (Sym.Name != Word)
+      return;
+
+    // Exclude constructor results. They have the same name as the class,
+    // but we don't have enough context to prefer them over the class.
+    if (Sym.SymInfo.Kind == index::SymbolKind::Constructor)
+      return;
+
+    auto MaybeDeclLoc =
+        indexToLSPLocation(Sym.CanonicalDeclaration, MainFilePath);
+    if (!MaybeDeclLoc) {
+      log("locateSymbolNamedTextuallyAt: {0}", MaybeDeclLoc.takeError());
+      return;
+    }
+    Location DeclLoc = *MaybeDeclLoc;
+    Location DefLoc;
+    if (Sym.Definition) {
+      auto MaybeDefLoc = indexToLSPLocation(Sym.Definition, MainFilePath);
+      if (!MaybeDefLoc) {
+        log("locateSymbolNamedTextuallyAt: {0}", MaybeDefLoc.takeError());
+        return;
+      }
+      DefLoc = *MaybeDefLoc;
+    }
+
+    if (ScoredResults.size() >= 3) {
+      // If we have more than 3 results, don't return anything,
+      // as confidence is too low.
+      // FIXME: Alternatively, try a stricter query?
+      TooMany = true;
+      return;
+    }
+
+    LocatedSymbol Located;
+    Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str();
+    Located.PreferredDeclaration = bool(Sym.Definition) ? DefLoc : DeclLoc;
+    Located.Definition = DefLoc;
+
+    SymbolQualitySignals Quality;
+    Quality.merge(Sym);
+    SymbolRelevanceSignals Relevance;
+    Relevance.Name = Sym.Name;
+    Relevance.Query = SymbolRelevanceSignals::Generic;
+    Relevance.merge(Sym);
+    auto Score =
+        evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate());
+    dlog("locateSymbolNamedTextuallyAt: {0}{1} = {2}\n{3}{4}\n", Sym.Scope,
+         Sym.Name, Score, Quality, Relevance);
+
+    ScoredResults.push_back({Score, std::move(Located)});
+  });
+
+  if (TooMany)
+    return {};
+
+  llvm::sort(ScoredResults,
+             [](const ScoredLocatedSymbol &A, const ScoredLocatedSymbol &B) {
+               return A.first > B.first;
+             });
+  std::vector<LocatedSymbol> Results;
+  for (auto &Res : std::move(ScoredResults))
+    Results.push_back(std::move(Res.second));
+  return Results;
+}
+
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index) {
   const auto &SM = AST.getSourceManager();
@@ -346,8 +500,12 @@ std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
       // expansion.)
       return {*std::move(Macro)};
 
-  return locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath,
-                           Index);
+  auto ASTResults =
+      locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, Index);
+  if (!ASTResults.empty())
+    return ASTResults;
+
+  return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath);
 }
 
 std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {

diff  --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h
index bc8deb1e3df7..8f42ca8d3265 100644
--- a/clang-tools-extra/clangd/XRefs.h
+++ b/clang-tools-extra/clangd/XRefs.h
@@ -49,6 +49,21 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LocatedSymbol &);
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index = nullptr);
 
+// Tries to provide a textual fallback for locating a symbol referenced at
+// a location, by looking up the word under the cursor as a symbol name in the
+// index. The aim is to pick up references to symbols in contexts where
+// AST-based resolution does not work, such as comments, strings, and PP
+// disabled regions. The implementation takes a number of measures to avoid
+// false positives, such as looking for some signal that the word at the
+// given location is likely to be an identifier. The function does not
+// currently return results for locations that end up as real expanded
+// tokens, although this may be relaxed for e.g. dependent code in the future.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+                             SourceLocation Loc,
+                             const std::string &MainFilePath);
+
 /// Get all document links
 std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
 

diff  --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index 098a9421a8bb..d387fd219a08 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -38,6 +38,7 @@ namespace clangd {
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::Matcher;
 using ::testing::UnorderedElementsAreArray;
@@ -353,7 +354,7 @@ TEST(LocateSymbol, All) {
       R"cpp(// Symbol concatenated inside macro (not supported)
        int *pi;
        #define POINTER(X) p ## X;
-       int i = *POINTER(^i);
+       int x = *POINTER(^i);
       )cpp",
 
       R"cpp(// Forward class declaration
@@ -606,6 +607,81 @@ TEST(LocateSymbol, Warnings) {
   }
 }
 
+TEST(LocateSymbol, TextualSmoke) {
+  auto T = Annotations(
+      R"cpp(
+        struct [[MyClass]] {};
+        // Comment mentioning M^yClass
+      )cpp");
+
+  auto TU = TestTU::withCode(T.code());
+  auto AST = TU.build();
+  auto Index = TU.index();
+  EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()),
+              ElementsAre(Sym("MyClass", T.range())));
+}
+
+TEST(LocateSymbol, Textual) {
+  const char *Tests[] = {
+      R"cpp(// Comment
+        struct [[MyClass]] {};
+        // Comment mentioning M^yClass
+      )cpp",
+      R"cpp(// String
+        struct [[MyClass]] {};
+        const char* s = "String literal mentioning M^yClass";
+      )cpp",
+      R"cpp(// Ifdef'ed out code
+        struct [[MyClass]] {};
+        #ifdef WALDO
+          M^yClass var;
+        #endif
+      )cpp",
+      R"cpp(// Macro definition
+        struct [[MyClass]] {};
+        #define DECLARE_MYCLASS_OBJ(name) M^yClass name;
+      )cpp",
+      R"cpp(// Invalid code
+        /*error-ok*/
+        int myFunction(int);
+        // Not triggered for token which survived preprocessing.
+        int var = m^yFunction();
+      )cpp",
+      R"cpp(// Dependent type
+        struct Foo {
+          void uniqueMethodName();
+        };
+        template <typename T>
+        void f(T t) {
+          // Not triggered for token which survived preprocessing.
+          t->u^niqueMethodName();
+        }
+      )cpp"};
+
+  for (const char *Test : Tests) {
+    Annotations T(Test);
+    llvm::Optional<Range> WantDecl;
+    if (!T.ranges().empty())
+      WantDecl = T.range();
+
+    auto TU = TestTU::withCode(T.code());
+
+    auto AST = TU.build();
+    auto Index = TU.index();
+    auto Results = locateSymbolNamedTextuallyAt(
+        AST, Index.get(),
+        cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
+        testPath(TU.Filename));
+
+    if (!WantDecl) {
+      EXPECT_THAT(Results, IsEmpty()) << Test;
+    } else {
+      ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;
+      EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;
+    }
+  }
+}
+
 TEST(LocateSymbol, Ambiguous) {
   auto T = Annotations(R"cpp(
     struct Foo {
@@ -680,6 +756,30 @@ TEST(LocateSymbol, Ambiguous) {
                                    Sym("baz", T.range("StaticOverload2"))));
 }
 
+TEST(LocateSymbol, TextualAmbiguous) {
+  auto T = Annotations(R"cpp(
+        struct Foo {
+          void $FooLoc[[uniqueMethodName]]();
+        };
+        struct Bar {
+          void $BarLoc[[uniqueMethodName]]();
+        };
+        // Will call u^niqueMethodName() on t.
+        template <typename T>
+        void f(T t);
+      )cpp");
+  auto TU = TestTU::withCode(T.code());
+  auto AST = TU.build();
+  auto Index = TU.index();
+  auto Results = locateSymbolNamedTextuallyAt(
+      AST, Index.get(),
+      cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
+      testPath(TU.Filename));
+  EXPECT_THAT(Results,
+              UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
+                                   Sym("uniqueMethodName", T.range("BarLoc"))));
+}
+
 TEST(LocateSymbol, TemplateTypedefs) {
   auto T = Annotations(R"cpp(
     template <class T> struct function {};


        


More information about the cfe-commits mailing list