[PATCH] D113995: [clangd] Dex Trigrams: Improve query trigram generation
Kirill Bobyrev via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 16 06:52:03 PST 2021
kbobyrev updated this revision to Diff 387615.
kbobyrev added a comment.
Add a small comment.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D113995/new/
https://reviews.llvm.org/D113995
Files:
clang-tools-extra/clangd/index/dex/Trigram.cpp
clang-tools-extra/clangd/unittests/DexTests.cpp
Index: clang-tools-extra/clangd/unittests/DexTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/DexTests.cpp
+++ clang-tools-extra/clangd/unittests/DexTests.cpp
@@ -404,6 +404,9 @@
EXPECT_THAT(identifierTrigramTokens("IsOK"),
trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
+ EXPECT_THAT(identifierTrigramTokens("_pb"), trigramsAre({"_", "_p"}));
+ EXPECT_THAT(identifierTrigramTokens("__pb"), trigramsAre({"_", "__", "_p"}));
+
EXPECT_THAT(
identifierTrigramTokens("abc_defGhij__klm"),
trigramsAre({"a", "ab", "ad", "abc", "abd", "ade", "adg", "bcd",
@@ -422,6 +425,14 @@
EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
+ EXPECT_THAT(generateQueryTrigrams("m_"), trigramsAre({"m_"}));
+
+ EXPECT_THAT(generateQueryTrigrams("p_b"), trigramsAre({"pb"}));
+ EXPECT_THAT(generateQueryTrigrams("pb_"), trigramsAre({"pb"}));
+ EXPECT_THAT(generateQueryTrigrams("_p"), trigramsAre({"_p"}));
+ EXPECT_THAT(generateQueryTrigrams("_pb_"), trigramsAre({"_p"}));
+ EXPECT_THAT(generateQueryTrigrams("__pb"), trigramsAre({"_p"}));
+
EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
EXPECT_THAT(generateQueryTrigrams("clangd"),
@@ -545,6 +556,18 @@
Req.Query = "ttf";
EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("OneTwoThreeFour"));
EXPECT_FALSE(Incomplete) << "3-char string is not a short query";
+
+ I = Dex::build(generateSymbols({"tok::kw_builtin_va_arg", "bar::whatever"}),
+ RefSlab(), RelationSlab());
+
+ Req.Query = "kw_";
+ EXPECT_THAT(match(*I, Req, &Incomplete),
+ ElementsAre("tok::kw_builtin_va_arg"));
+ EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol";
+ Req.Scopes = {"tok::"};
+ EXPECT_THAT(match(*I, Req, &Incomplete),
+ ElementsAre("tok::kw_builtin_va_arg"));
+ EXPECT_FALSE(Incomplete) << "kw_ is enough to match the whole symbol";
}
TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {
Index: clang-tools-extra/clangd/index/dex/Trigram.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Trigram.cpp
+++ clang-tools-extra/clangd/index/dex/Trigram.cpp
@@ -101,17 +101,43 @@
std::vector<Token> generateQueryTrigrams(llvm::StringRef Query) {
if (Query.empty())
return {};
- std::string LowercaseQuery = Query.lower();
- if (Query.size() < 3) // short-query trigrams only
- return {Token(Token::Kind::Trigram, LowercaseQuery)};
// Apply fuzzy matching text segmentation.
std::vector<CharRole> Roles(Query.size());
calculateRoles(Query, llvm::makeMutableArrayRef(Roles.data(), Query.size()));
+ std::string LowercaseQuery = Query.lower();
+
+ if (LowercaseQuery.size() < 3) // short-query trigrams only.
+ return {Token(Token::Kind::Trigram, LowercaseQuery)};
+
+ unsigned ValidSymbols =
+ llvm::count_if(Roles, [](CharRole R) { return R == Head || R == Tail; });
+ // If the query does not have any alphanumeric symbols, don't restrict the
+ // result to the names.
+ if (ValidSymbols == 0)
+ return {};
+ // For queries with very few letters, emulate what generateIdentifierTrigrams
+ // outputs for the beginning of the Identifier.
+ if (ValidSymbols < 3) {
+ std::string Letters =
+ Roles.front() == Separator ? std::string(1, Query.front()) : "";
+ for (unsigned I = 0; I < LowercaseQuery.size(); ++I) {
+ if (Roles[I] == Head || Roles[I] == Tail) {
+ Letters += LowercaseQuery[I];
+ // Similar to the identifier trigram generation, stop here for the
+ // queries starting with the separator, i.e. "_va" will only output
+ // "_v" here, identifier trigram generator will output "_" and "_v"
+ if (Roles.front() == Separator)
+ break;
+ }
+ }
+ return {Token(Token::Kind::Trigram, Letters)};
+ }
+
llvm::DenseSet<Token> UniqueTrigrams;
std::string Chars;
- for (unsigned I = 0; I < Query.size(); ++I) {
+ for (unsigned I = 0; I < LowercaseQuery.size(); ++I) {
if (Roles[I] != Head && Roles[I] != Tail)
continue; // Skip delimiters.
Chars.push_back(LowercaseQuery[I]);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D113995.387615.patch
Type: text/x-patch
Size: 4306 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20211116/c37802ca/attachment-0001.bin>
More information about the cfe-commits
mailing list