[PATCH] D59300: [clangd] Tune the fuzzy-matching algorithm
Ilya Biryukov via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 15 04:13:02 PDT 2019
ilya-biryukov updated this revision to Diff 190803.
ilya-biryukov marked 3 inline comments as done.
ilya-biryukov added a comment.
Address comments:
- Shorten the comment to fit it into a single line.
- Added a comment about single-case patterns
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D59300/new/
https://reviews.llvm.org/D59300
Files:
clang-tools-extra/clangd/FuzzyMatch.cpp
clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
Index: clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
+++ clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
@@ -9,6 +9,7 @@
#include "FuzzyMatch.h"
#include "llvm/ADT/StringExtras.h"
+#include "gmock/gmock-matchers.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -247,6 +248,8 @@
EXPECT_THAT("foo", ranks("[foo]", "[Foo]"));
EXPECT_THAT("onMes",
ranks("[onMes]sage", "[onmes]sage", "[on]This[M]ega[Es]capes"));
+ EXPECT_THAT("onmes",
+ ranks("[onmes]sage", "[onMes]sage", "[on]This[M]ega[Es]capes"));
EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase"));
EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase"));
EXPECT_THAT("p", ranks("[p]", "[p]arse", "[p]osix", "[p]afdsa", "[p]ath"));
@@ -270,12 +273,18 @@
// Verify some bounds so we know scores fall in the right range.
// Testing exact scores is fragile, so we prefer Ranking tests.
TEST(FuzzyMatch, Scoring) {
- EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 0.f));
+ EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 7.f / 12.f));
EXPECT_THAT("abs", matches("[abs]l", 1.f));
EXPECT_THAT("abs", matches("[abs]", 2.f));
EXPECT_THAT("Abs", matches("[abs]", 2.f));
}
+TEST(FuzzyMatch, InitialismAndSegment) {
+ // We want these scores to be roughly the same.
+ EXPECT_THAT("up", matches("[u]nique_[p]tr", 3.f / 4.f));
+ EXPECT_THAT("up", matches("[up]per_bound", 1.f));
+}
+
// Returns pretty-printed segmentation of Text.
// e.g. std::basic_string --> +-- +---- +-----
std::string segment(llvm::StringRef Text) {
Index: clang-tools-extra/clangd/FuzzyMatch.cpp
===================================================================
--- clang-tools-extra/clangd/FuzzyMatch.cpp
+++ clang-tools-extra/clangd/FuzzyMatch.cpp
@@ -71,7 +71,7 @@
// Score field is 15 bits wide, min value is -2^14, we use half of that.
static constexpr int AwfulScore = -(1 << 13);
static bool isAwful(int S) { return S < AwfulScore / 2; }
-static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score.
+static constexpr int PerfectBonus = 4; // Perfect per-pattern-char score.
FuzzyMatcher::FuzzyMatcher(llvm::StringRef Pattern)
: PatN(std::min<int>(MaxPat, Pattern.size())),
@@ -267,24 +267,31 @@
}
int FuzzyMatcher::skipPenalty(int W, Action Last) const {
- int S = 0;
+ if (W == 0) // Skipping the first character.
+ return 3;
if (WordRole[W] == Head) // Skipping a segment.
- S += 1;
- if (Last == Match) // Non-consecutive match.
- S += 2; // We'd rather skip a segment than split our match.
- return S;
+ return 1; // We want to keep this lower than a consecutive match bonus.
+ // Instead of penalizing non-consecutive matches, we give a bonus to a
+ // consecutive match in matchBonus. This produces a better score distribution
+ // than penalties in case of small patterns, e.g. 'up' for 'unique_ptr'.
+ return 0;
}
int FuzzyMatcher::matchBonus(int P, int W, Action Last) const {
assert(LowPat[P] == LowWord[W]);
int S = 1;
- // Bonus: pattern so far is a (case-insensitive) prefix of the word.
- if (P == W) // We can't skip pattern characters, so we must have matched all.
- ++S;
+ bool IsPatSingleCase =
+ (PatTypeSet == 1 << Lower) || (PatTypeSet == 1 << Upper);
// Bonus: case matches, or a Head in the pattern aligns with one in the word.
- if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) ||
- (PatRole[P] == Head && WordRole[W] == Head))
+ // Single-case patterns lack segmentation signals and we assume any character
+ // can be a head of a segment.
+ if (Pat[P] == Word[W] ||
+ (WordRole[W] == Head && (IsPatSingleCase || PatRole[P] == Head)))
++S;
+ // Bonus: a consecutive match. First character match also gets a bonus to
+ // ensure prefix final match score normalizes to 1.0.
+ if (W == 0 || Last == Match)
+ S += 2;
// Penalty: matching inside a segment (and previous char wasn't matched).
if (WordRole[W] == Tail && P && Last == Miss)
S -= 3;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59300.190803.patch
Type: text/x-patch
Size: 4155 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20190315/a63cfa41/attachment-0001.bin>
More information about the cfe-commits
mailing list