[clang-tools-extra] 3f1c2bf - [clangd] go-to-def on names in comments etc that are used nearby.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 22 10:46:52 PDT 2020
Author: Sam McCall
Date: 2020-04-22T19:46:41+02:00
New Revision: 3f1c2bf1712c7496a80a0f89036ab1625ff347a5
URL: https://github.com/llvm/llvm-project/commit/3f1c2bf1712c7496a80a0f89036ab1625ff347a5
DIFF: https://github.com/llvm/llvm-project/commit/3f1c2bf1712c7496a80a0f89036ab1625ff347a5.diff
LOG: [clangd] go-to-def on names in comments etc that are used nearby.
Summary:
This is intended as a companion to (and is inspired by) D72874 which attempts to
resolve these cases using the index.
The intent is we'd try this strategy after the AST-based approach but before the
index-based (I think local usages would be more reliable than index matches).
Reviewers: nridge
Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D75479
Added:
Modified:
clang-tools-extra/clangd/SourceCode.cpp
clang-tools-extra/clangd/SourceCode.h
clang-tools-extra/clangd/XRefs.cpp
clang-tools-extra/clangd/XRefs.h
clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
clang-tools-extra/clangd/unittests/XRefsTests.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 1943784bfd18..dd4c863cb96a 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -855,6 +855,96 @@ llvm::StringSet<> collectWords(llvm::StringRef Content) {
return Result;
}
+static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before,
+ llvm::StringRef After) {
+ // `foo` is an identifier.
+ if (Before.endswith("`") && After.startswith("`"))
+ return true;
+ // In foo::bar, both foo and bar are identifiers.
+ if (Before.endswith("::") || After.startswith("::"))
+ return true;
+ // Doxygen tags like \c foo indicate identifiers.
+ // Don't search too far back.
+ // This duplicates clang's doxygen parser, revisit if it gets complicated.
+ Before = Before.take_back(100); // Don't search too far back.
+ auto Pos = Before.find_last_of("\\@");
+ if (Pos != llvm::StringRef::npos) {
+ llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' ');
+ if (Tag == "p" || Tag == "c" || Tag == "class" || Tag == "tparam" ||
+ Tag == "param" || Tag == "param[in]" || Tag == "param[out]" ||
+ Tag == "param[in,out]" || Tag == "retval" || Tag == "throw" ||
+ Tag == "throws" || Tag == "link")
+ return true;
+ }
+
+ // Word contains underscore.
+ // This handles things like snake_case and MACRO_CASE.
+ if (Word.contains('_')) {
+ return true;
+ }
+ // Word contains capital letter other than at beginning.
+ // This handles things like lowerCamel and UpperCamel.
+ // The check for also containing a lowercase letter is to rule out
+ // initialisms like "HTTP".
+ bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
+ bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
+ if (HasLower && HasUpper) {
+ return true;
+ }
+ // FIXME: consider mid-sentence Capitalization?
+ return false;
+}
+
+llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc,
+ const syntax::TokenBuffer &TB,
+ const LangOptions &LangOpts) {
+ const auto &SM = TB.sourceManager();
+ auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB);
+ for (const auto &T : Touching) {
+ // If the token is an identifier or a keyword, don't use any heuristics.
+ if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) {
+ SpelledWord Result;
+ Result.Location = T.location();
+ Result.Text = T.text(SM);
+ Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind());
+ Result.PartOfSpelledToken = &T;
+ Result.SpelledToken = &T;
+ auto Expanded =
+ TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location()));
+ if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text)
+ Result.ExpandedToken = &Expanded.front();
+ return Result;
+ }
+ }
+ FileID File;
+ unsigned Offset;
+ std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc);
+ bool Invalid = false;
+ llvm::StringRef Code = SM.getBufferData(File, &Invalid);
+ if (Invalid)
+ return llvm::None;
+ unsigned B = Offset, E = Offset;
+ while (B > 0 && isIdentifierBody(Code[B - 1]))
+ --B;
+ while (E < Code.size() && isIdentifierBody(Code[E]))
+ ++E;
+ if (B == E)
+ return llvm::None;
+
+ SpelledWord Result;
+ Result.Location = SM.getComposedLoc(File, B);
+ Result.Text = Code.slice(B, E);
+ Result.LikelyIdentifier =
+ isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) &&
+ // should not be a keyword
+ tok::isAnyIdentifier(
+ IdentifierTable(LangOpts).get(Result.Text).getTokenID());
+ for (const auto &T : Touching)
+ if (T.location() <= Result.Location)
+ Result.PartOfSpelledToken = &T;
+ return Result;
+}
+
llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
Preprocessor &PP) {
SourceLocation Loc = SpelledTok.location();
diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h
index dfa685fdd795..d1e5aa5f7c2b 100644
--- a/clang-tools-extra/clangd/SourceCode.h
+++ b/clang-tools-extra/clangd/SourceCode.h
@@ -216,6 +216,35 @@ std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
/// - drops stopwords like "get" and "for"
llvm::StringSet<> collectWords(llvm::StringRef Content);
+// Something that looks like a word in the source code.
+// Could be a "real" token that's "live" in the AST, a spelled token consumed by
+// the preprocessor, or part of a spelled token (e.g. word in a comment).
+struct SpelledWord {
+ // (Spelling) location of the start of the word.
+ SourceLocation Location;
+ // The range of the word itself, excluding any quotes.
+ // This is a subrange of the file buffer.
+ llvm::StringRef Text;
+ // Whether this word is likely to refer to an identifier. True if:
+ // - the word is a spelled identifier token
+ // - Text is identifier-like (e.g. "foo_bar")
+ // - Text is surrounded by backticks (e.g. Foo in "// returns `Foo`")
+ bool LikelyIdentifier = false;
+ // Set if the word is contained in a token spelled in the file.
+ // (This should always be true, but comments aren't retained by TokenBuffer).
+ const syntax::Token *PartOfSpelledToken = nullptr;
+ // Set if the word is exactly a token spelled in the file.
+ const syntax::Token *SpelledToken = nullptr;
+ // Set if the word is a token spelled in the file, and that token survives
+ // preprocessing to emit an expanded token spelled the same way.
+ const syntax::Token *ExpandedToken = nullptr;
+
+ // Find the unique word that contains SpelledLoc or starts/ends there.
+ static llvm::Optional<SpelledWord> touching(SourceLocation SpelledLoc,
+ const syntax::TokenBuffer &TB,
+ const LangOptions &LangOpts);
+};
+
/// Heuristically determine namespaces visible at a point, without parsing Code.
/// This considers using-directives and enclosing namespace-declarations that
/// are visible (and not obfuscated) in the file itself (not headers).
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 2e2e6602c8d3..d17fa52bd82c 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -34,6 +34,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
#include "clang/Index/IndexDataConsumer.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Index/IndexingAction.h"
@@ -48,6 +49,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -315,93 +317,44 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier,
return Result;
}
-llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) {
- unsigned B = Offset, E = Offset;
- while (B > 0 && isIdentifierBody(Code[B - 1]))
- --B;
- while (E < Code.size() && isIdentifierBody(Code[E]))
- ++E;
- return Code.slice(B, E);
+bool tokenSpelledAt(SourceLocation SpellingLoc, const syntax::TokenBuffer &TB) {
+ auto ExpandedTokens = TB.expandedTokens(
+ TB.sourceManager().getMacroArgExpandedLocation(SpellingLoc));
+ return !ExpandedTokens.empty();
}
-bool isLikelyToBeIdentifier(StringRef Word) {
- // Word contains underscore.
- // This handles things like snake_case and MACRO_CASE.
- if (Word.contains('_')) {
- return true;
- }
- // Word contains capital letter other than at beginning.
- // This handles things like lowerCamel and UpperCamel.
- // The check for also containing a lowercase letter is to rule out
- // initialisms like "HTTP".
- bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
- bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
- if (HasLower && HasUpper) {
- return true;
- }
- // FIXME: There are other signals we could listen for.
- // Some of these require inspecting the surroundings of the word as well.
- // - mid-sentence Capitalization
- // - markup like quotes / backticks / brackets / "\p"
- // - word has a qualifier (foo::bar)
- return false;
-}
-
-bool tokenSurvivedPreprocessing(SourceLocation Loc,
- const syntax::TokenBuffer &TB) {
- auto WordExpandedTokens =
- TB.expandedTokens(TB.sourceManager().getMacroArgExpandedLocation(Loc));
- return !WordExpandedTokens.empty();
+llvm::StringRef sourcePrefix(SourceLocation Loc, const SourceManager &SM) {
+ auto D = SM.getDecomposedLoc(Loc);
+ bool Invalid = false;
+ llvm::StringRef Buf = SM.getBufferData(D.first, &Invalid);
+ if (Invalid || D.second > Buf.size())
+ return "";
+ return Buf.substr(0, D.second);
}
} // namespace
std::vector<LocatedSymbol>
-locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
- SourceLocation Loc,
- const std::string &MainFilePath) {
- const auto &SM = AST.getSourceManager();
-
- // Get the raw word at the specified location.
- unsigned Pos;
- FileID File;
- std::tie(File, Pos) = SM.getDecomposedLoc(Loc);
- llvm::StringRef Code = SM.getBufferData(File);
- llvm::StringRef Word = wordTouching(Code, Pos);
- if (Word.empty())
- return {};
- unsigned WordOffset = Word.data() - Code.data();
- SourceLocation WordStart = SM.getComposedLoc(File, WordOffset);
-
- // Attempt to determine the kind of token that contains the word,
- // and bail if it's a string literal. Note that we cannot always
- // determine the token kind (e.g. comments, for which we do want
- // to activate, are not retained by TokenBuffer).
- for (syntax::Token T :
- syntax::spelledTokensTouching(WordStart, AST.getTokens())) {
- if (T.range(AST.getSourceManager()).touches(WordOffset + Word.size())) {
- if (isStringLiteral(T.kind()))
- return {};
- }
- }
-
- // Do not consider tokens that survived preprocessing.
- // We are erring on the safe side here, as a user may expect to get
- // accurate (as opposed to textual-heuristic) results for such tokens.
- // FIXME: Relax this for dependent code.
- if (tokenSurvivedPreprocessing(WordStart, AST.getTokens()))
+locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST,
+ const SymbolIndex *Index,
+ const std::string &MainFilePath) {
+ // Don't use heuristics if this is a real identifier, or not an identifier.
+ if (Word.ExpandedToken || !Word.LikelyIdentifier || !Index)
return {};
-
- // Additionally filter for signals that the word is likely to be an
- // identifier. This avoids triggering on e.g. random words in a comment.
- if (!isLikelyToBeIdentifier(Word))
+ // We don't want to handle words in string literals. It'd be nice to whitelist
+ // comments instead, but they're not retained in TokenBuffer.
+ if (Word.PartOfSpelledToken &&
+ isStringLiteral(Word.PartOfSpelledToken->kind()))
return {};
+ const auto &SM = AST.getSourceManager();
// Look up the selected word in the index.
FuzzyFindRequest Req;
- Req.Query = Word.str();
+ Req.Query = Word.Text.str();
Req.ProximityPaths = {MainFilePath};
- Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts());
+ // Find the namespaces to query by lexing the file.
+ Req.Scopes =
+ visibleNamespaces(sourcePrefix(Word.Location, SM), AST.getLangOpts());
// FIXME: For extra strictness, consider AnyScope=false.
Req.AnyScope = true;
// We limit the results to 3 further below. This limit is to avoid fetching
@@ -416,7 +369,7 @@ locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
// This is to avoid too many false positives.
// We could relax this in the future (e.g. to allow for typos) if we make
// the query more accurate by other means.
- if (Sym.Name != Word)
+ if (Sym.Name != Word.Text)
return;
// Exclude constructor results. They have the same name as the class,
@@ -481,6 +434,82 @@ locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
return Results;
}
+const syntax::Token *findNearbyIdentifier(const SpelledWord &Word,
+ const syntax::TokenBuffer &TB) {
+ // Don't use heuristics if this is a real identifier.
+ // Unlikely identifiers are OK if they were used as identifiers nearby.
+ if (Word.ExpandedToken)
+ return nullptr;
+ // We don't want to handle words in string literals. It'd be nice to whitelist
+ // comments instead, but they're not retained in TokenBuffer.
+ if (Word.PartOfSpelledToken &&
+ isStringLiteral(Word.PartOfSpelledToken->kind()))
+ return {};
+
+ const SourceManager &SM = TB.sourceManager();
+ // We prefer the closest possible token, line-wise. Backwards is penalized.
+ // Ties are implicitly broken by traversal order (first-one-wins).
+ auto File = SM.getFileID(Word.Location);
+ unsigned WordLine = SM.getSpellingLineNumber(Word.Location);
+ auto Cost = [&](SourceLocation Loc) -> unsigned {
+ assert(SM.getFileID(Loc) == File && "spelled token in wrong file?");
+ unsigned Line = SM.getSpellingLineNumber(Loc);
+ if (Line > WordLine)
+ return 1 + llvm::Log2_64(Line - WordLine);
+ if (Line < WordLine)
+ return 2 + llvm::Log2_64(WordLine - Line);
+ return 0;
+ };
+ const syntax::Token *BestTok = nullptr;
+ // Search bounds are based on word length: 2^N lines forward.
+ unsigned BestCost = Word.Text.size() + 1;
+
+ // Updates BestTok and BestCost if Tok is a good candidate.
+ // May return true if the cost is too high for this token.
+ auto Consider = [&](const syntax::Token &Tok) {
+ if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word.Text))
+ return false;
+ // No point guessing the same location we started with.
+ if (Tok.location() == Word.Location)
+ return false;
+ // We've done cheap checks, compute cost so we can break the caller's loop.
+ unsigned TokCost = Cost(Tok.location());
+ if (TokCost >= BestCost)
+ return true; // causes the outer loop to break.
+ // Allow locations that might be part of the AST, and macros (even if empty)
+ // but not things like disabled preprocessor sections.
+ if (!(tokenSpelledAt(Tok.location(), TB) || TB.expansionStartingAt(&Tok)))
+ return false;
+ // We already verified this token is an improvement.
+ BestCost = TokCost;
+ BestTok = &Tok;
+ return false;
+ };
+ auto SpelledTokens = TB.spelledTokens(File);
+ // Find where the word occurred in the token stream, to search forward & back.
+ auto *I = llvm::partition_point(SpelledTokens, [&](const syntax::Token &T) {
+ assert(SM.getFileID(T.location()) == SM.getFileID(Word.Location));
+ return T.location() >= Word.Location; // Comparison OK: same file.
+ });
+ // Search for matches after the cursor.
+ for (const syntax::Token &Tok : llvm::makeArrayRef(I, SpelledTokens.end()))
+ if (Consider(Tok))
+ break; // costs of later tokens are greater...
+ // Search for matches before the cursor.
+ for (const syntax::Token &Tok :
+ llvm::reverse(llvm::makeArrayRef(SpelledTokens.begin(), I)))
+ if (Consider(Tok))
+ break;
+
+ if (BestTok)
+ vlog(
+ "Word {0} under cursor {1} isn't a token (after PP), trying nearby {2}",
+ Word.Text, Word.Location.printToString(SM),
+ BestTok->location().printToString(SM));
+
+ return BestTok;
+}
+
std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index) {
const auto &SM = AST.getSourceManager();
@@ -516,7 +545,28 @@ std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
if (!ASTResults.empty())
return ASTResults;
- return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath);
+ // If the cursor can't be resolved directly, try fallback strategies.
+ auto Word =
+ SpelledWord::touching(*CurLoc, AST.getTokens(), AST.getLangOpts());
+ if (Word) {
+ // Is the same word nearby a real identifier that might refer to something?
+ if (const syntax::Token *NearbyIdent =
+ findNearbyIdentifier(*Word, AST.getTokens())) {
+ if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath))
+ return {*std::move(Macro)};
+ ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST,
+ *MainFilePath, Index);
+ if (!ASTResults.empty())
+ return ASTResults;
+ }
+ // No nearby word, or it didn't refer to anything either. Try the index.
+ auto TextualResults =
+ locateSymbolTextually(*Word, AST, Index, *MainFilePath);
+ if (!TextualResults.empty())
+ return TextualResults;
+ }
+
+ return {};
}
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {
diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h
index 8f42ca8d3265..af78ec780c5a 100644
--- a/clang-tools-extra/clangd/XRefs.h
+++ b/clang-tools-extra/clangd/XRefs.h
@@ -16,6 +16,7 @@
#include "FormattedString.h"
#include "Path.h"
#include "Protocol.h"
+#include "SourceCode.h"
#include "index/Index.h"
#include "index/SymbolLocation.h"
#include "clang/AST/Type.h"
@@ -26,6 +27,10 @@
#include <vector>
namespace clang {
+namespace syntax {
+class Token;
+class TokenBuffer;
+} // namespace syntax
namespace clangd {
class ParsedAST;
@@ -49,20 +54,22 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LocatedSymbol &);
std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index = nullptr);
-// Tries to provide a textual fallback for locating a symbol referenced at
-// a location, by looking up the word under the cursor as a symbol name in the
-// index. The aim is to pick up references to symbols in contexts where
+// Tries to provide a textual fallback for locating a symbol by looking up the
+// word under the cursor as a symbol name in the index.
+// The aim is to pick up references to symbols in contexts where
// AST-based resolution does not work, such as comments, strings, and PP
-// disabled regions. The implementation takes a number of measures to avoid
-// false positives, such as looking for some signal that the word at the
-// given location is likely to be an identifier. The function does not
-// currently return results for locations that end up as real expanded
-// tokens, although this may be relaxed for e.g. dependent code in the future.
+// disabled regions.
// (This is for internal use by locateSymbolAt, and is exposed for testing).
std::vector<LocatedSymbol>
-locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
- SourceLocation Loc,
- const std::string &MainFilePath);
+locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST,
+ const SymbolIndex *Index,
+ const std::string &MainFilePath);
+
+// Try to find a proximate occurrence of `Word` as an identifier, which can be
+// used to resolve it.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+const syntax::Token *findNearbyIdentifier(const SpelledWord &Word,
+ const syntax::TokenBuffer &TB);
/// Get all document links
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
index 76a3a3cac267..71721fe81cd9 100644
--- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
@@ -12,6 +12,7 @@
#include "TestTU.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TokenKinds.h"
#include "clang/Format/Format.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_os_ostream.h"
@@ -328,6 +329,101 @@ TEST(SourceCodeTests, CollectWords) {
EXPECT_EQ(ActualWords, ExpectedWords);
}
+class SpelledWordsTest : public ::testing::Test {
+ llvm::Optional<ParsedAST> AST;
+
+ llvm::Optional<SpelledWord> tryWord(const char *Text) {
+ llvm::Annotations A(Text);
+ auto TU = TestTU::withCode(A.code());
+ AST = TU.build();
+ auto SW = SpelledWord::touching(
+ AST->getSourceManager().getComposedLoc(
+ AST->getSourceManager().getMainFileID(), A.point()),
+ AST->getTokens(), AST->getLangOpts());
+ if (A.ranges().size()) {
+ llvm::StringRef Want = A.code().slice(A.range().Begin, A.range().End);
+ EXPECT_EQ(Want, SW->Text) << Text;
+ }
+ return SW;
+ }
+
+protected:
+ SpelledWord word(const char *Text) {
+ auto Result = tryWord(Text);
+ EXPECT_TRUE(Result) << Text;
+ return Result.getValueOr(SpelledWord());
+ }
+
+ void noWord(const char *Text) { EXPECT_FALSE(tryWord(Text)) << Text; }
+};
+
+TEST_F(SpelledWordsTest, HeuristicBoundaries) {
+ word("// [[^foo]] ");
+ word("// [[f^oo]] ");
+ word("// [[foo^]] ");
+ word("// [[foo^]]+bar ");
+ noWord("//^ foo ");
+ noWord("// foo ^");
+}
+
+TEST_F(SpelledWordsTest, LikelyIdentifier) {
+ EXPECT_FALSE(word("// ^foo ").LikelyIdentifier);
+ EXPECT_TRUE(word("// [[^foo_bar]] ").LikelyIdentifier);
+ EXPECT_TRUE(word("// [[^fooBar]] ").LikelyIdentifier);
+ EXPECT_FALSE(word("// H^TTP ").LikelyIdentifier);
+ EXPECT_TRUE(word("// \\p [[^foo]] ").LikelyIdentifier);
+ EXPECT_TRUE(word("// @param[in] [[^foo]] ").LikelyIdentifier);
+ EXPECT_TRUE(word("// `[[f^oo]]` ").LikelyIdentifier);
+ EXPECT_TRUE(word("// bar::[[f^oo]] ").LikelyIdentifier);
+ EXPECT_TRUE(word("// [[f^oo]]::bar ").LikelyIdentifier);
+}
+
+TEST_F(SpelledWordsTest, Comment) {
+ auto W = word("// [[^foo]]");
+ EXPECT_FALSE(W.PartOfSpelledToken);
+ EXPECT_FALSE(W.SpelledToken);
+ EXPECT_FALSE(W.ExpandedToken);
+}
+
+TEST_F(SpelledWordsTest, PartOfString) {
+ auto W = word(R"( auto str = "foo [[^bar]] baz"; )");
+ ASSERT_TRUE(W.PartOfSpelledToken);
+ EXPECT_EQ(W.PartOfSpelledToken->kind(), tok::string_literal);
+ EXPECT_FALSE(W.SpelledToken);
+ EXPECT_FALSE(W.ExpandedToken);
+}
+
+TEST_F(SpelledWordsTest, DisabledSection) {
+ auto W = word(R"cpp(
+ #if 0
+ foo [[^bar]] baz
+ #endif
+ )cpp");
+ ASSERT_TRUE(W.SpelledToken);
+ EXPECT_EQ(W.SpelledToken->kind(), tok::identifier);
+ EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken);
+ EXPECT_FALSE(W.ExpandedToken);
+}
+
+TEST_F(SpelledWordsTest, Macros) {
+ auto W = word(R"cpp(
+ #define ID(X) X
+ ID(int [[^i]]);
+ )cpp");
+ ASSERT_TRUE(W.SpelledToken);
+ EXPECT_EQ(W.SpelledToken->kind(), tok::identifier);
+ EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken);
+ ASSERT_TRUE(W.ExpandedToken);
+ EXPECT_EQ(W.ExpandedToken->kind(), tok::identifier);
+
+ W = word(R"cpp(
+ #define OBJECT Expansion;
+ int [[^OBJECT]];
+ )cpp");
+ EXPECT_TRUE(W.SpelledToken);
+ EXPECT_FALSE(W.ExpandedToken) << "Expanded token is spelled
diff erently";
+}
+
TEST(SourceCodeTests, VisibleNamespaces) {
std::vector<std::pair<const char *, std::vector<std::string>>> Cases = {
{
diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index ce7f76ccf4f4..027939e15f77 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -685,10 +685,15 @@ TEST(LocateSymbol, Textual) {
auto AST = TU.build();
auto Index = TU.index();
- auto Results = locateSymbolNamedTextuallyAt(
- AST, Index.get(),
+ auto Word = SpelledWord::touching(
cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
- testPath(TU.Filename));
+ AST.getTokens(), AST.getLangOpts());
+ if (!Word) {
+ ADD_FAILURE() << "No word touching point!" << Test;
+ continue;
+ }
+ auto Results =
+ locateSymbolTextually(*Word, AST, Index.get(), testPath(TU.Filename));
if (!WantDecl) {
EXPECT_THAT(Results, IsEmpty()) << Test;
@@ -788,10 +793,12 @@ TEST(LocateSymbol, TextualAmbiguous) {
auto TU = TestTU::withCode(T.code());
auto AST = TU.build();
auto Index = TU.index();
- auto Results = locateSymbolNamedTextuallyAt(
- AST, Index.get(),
+ auto Word = SpelledWord::touching(
cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())),
- testPath(TU.Filename));
+ AST.getTokens(), AST.getLangOpts());
+ ASSERT_TRUE(Word);
+ auto Results =
+ locateSymbolTextually(*Word, AST, Index.get(), testPath(TU.Filename));
EXPECT_THAT(Results,
UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
Sym("uniqueMethodName", T.range("BarLoc"))));
@@ -985,6 +992,101 @@ TEST(LocateSymbol, WithPreamble) {
ElementsAre(Sym("foo", FooWithoutHeader.range())));
}
+TEST(LocateSymbol, NearbyTokenSmoke) {
+ auto T = Annotations(R"cpp(
+ // prints e^rr and crashes
+ void die(const char* [[err]]);
+ )cpp");
+ auto AST = TestTU::withCode(T.code()).build();
+ // We don't pass an index, so can't hit index-based fallback.
+ EXPECT_THAT(locateSymbolAt(AST, T.point()),
+ ElementsAre(Sym("err", T.range())));
+}
+
+TEST(LocateSymbol, NearbyIdentifier) {
+ const char *Tests[] = {
+ R"cpp(
+ // regular identifiers (won't trigger)
+ int hello;
+ int y = he^llo;
+ )cpp",
+ R"cpp(
+ // disabled preprocessor sections
+ int [[hello]];
+ #if 0
+ int y = ^hello;
+ #endif
+ )cpp",
+ R"cpp(
+ // comments
+ // he^llo, world
+ int [[hello]];
+ )cpp",
+ R"cpp(
+ // not triggered by string literals
+ int hello;
+ const char* greeting = "h^ello, world";
+ )cpp",
+
+ R"cpp(
+ // can refer to macro invocations
+ #define INT int
+ [[INT]] x;
+ // I^NT
+ )cpp",
+
+ R"cpp(
+ // can refer to macro invocations (even if they expand to nothing)
+ #define EMPTY
+ [[EMPTY]] int x;
+ // E^MPTY
+ )cpp",
+
+ R"cpp(
+ // prefer nearest occurrence, backwards is worse than forwards
+ int hello;
+ int x = hello;
+ // h^ello
+ int y = [[hello]];
+ int z = hello;
+ )cpp",
+
+ R"cpp(
+ // short identifiers find near results
+ int [[hi]];
+ // h^i
+ )cpp",
+ R"cpp(
+ // short identifiers don't find far results
+ int hi;
+
+
+
+ // h^i
+ )cpp",
+ };
+ for (const char *Test : Tests) {
+ Annotations T(Test);
+ auto AST = TestTU::withCode(T.code()).build();
+ const auto &SM = AST.getSourceManager();
+ llvm::Optional<Range> Nearby;
+ auto Word =
+ SpelledWord::touching(cantFail(sourceLocationInMainFile(SM, T.point())),
+ AST.getTokens(), AST.getLangOpts());
+ if (!Word) {
+ ADD_FAILURE() << "No word at point! " << Test;
+ continue;
+ }
+ if (const auto *Tok = findNearbyIdentifier(*Word, AST.getTokens()))
+ Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange(
+ Tok->location(), Tok->endLocation()));
+ if (T.ranges().empty())
+ EXPECT_THAT(Nearby, Eq(llvm::None)) << Test;
+ else
+ EXPECT_EQ(Nearby, T.range()) << Test;
+ }
+}
+
TEST(FindReferences, WithinAST) {
const char *Tests[] = {
R"cpp(// Local variable
More information about the cfe-commits
mailing list