[PATCH] D59300: [clangd] Tune the fuzzy-matching algorithm

Ilya Biryukov via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 15 04:13:02 PDT 2019


ilya-biryukov updated this revision to Diff 190803.
ilya-biryukov marked 3 inline comments as done.
ilya-biryukov added a comment.

Address comments:

- Shorten the comment to fit it into a single line.
- Added a comment about single-case patterns


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59300/new/

https://reviews.llvm.org/D59300

Files:
  clang-tools-extra/clangd/FuzzyMatch.cpp
  clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp


Index: clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
+++ clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp
@@ -9,6 +9,7 @@
 #include "FuzzyMatch.h"
 
 #include "llvm/ADT/StringExtras.h"
+#include "gmock/gmock-matchers.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -247,6 +248,8 @@
   EXPECT_THAT("foo", ranks("[foo]", "[Foo]"));
   EXPECT_THAT("onMes",
               ranks("[onMes]sage", "[onmes]sage", "[on]This[M]ega[Es]capes"));
+  EXPECT_THAT("onmes",
+              ranks("[onmes]sage", "[onMes]sage", "[on]This[M]ega[Es]capes"));
   EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase"));
   EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase"));
   EXPECT_THAT("p", ranks("[p]", "[p]arse", "[p]osix", "[p]afdsa", "[p]ath"));
@@ -270,12 +273,18 @@
 // Verify some bounds so we know scores fall in the right range.
 // Testing exact scores is fragile, so we prefer Ranking tests.
 TEST(FuzzyMatch, Scoring) {
-  EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 0.f));
+  EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 7.f / 12.f));
   EXPECT_THAT("abs", matches("[abs]l", 1.f));
   EXPECT_THAT("abs", matches("[abs]", 2.f));
   EXPECT_THAT("Abs", matches("[abs]", 2.f));
 }
 
+TEST(FuzzyMatch, InitialismAndSegment) {
+  // We want these scores to be roughly the same.
+  EXPECT_THAT("up", matches("[u]nique_[p]tr", 3.f / 4.f));
+  EXPECT_THAT("up", matches("[up]per_bound", 1.f));
+}
+
 // Returns pretty-printed segmentation of Text.
 // e.g. std::basic_string --> +--  +---- +-----
 std::string segment(llvm::StringRef Text) {
Index: clang-tools-extra/clangd/FuzzyMatch.cpp
===================================================================
--- clang-tools-extra/clangd/FuzzyMatch.cpp
+++ clang-tools-extra/clangd/FuzzyMatch.cpp
@@ -71,7 +71,7 @@
 // Score field is 15 bits wide, min value is -2^14, we use half of that.
 static constexpr int AwfulScore = -(1 << 13);
 static bool isAwful(int S) { return S < AwfulScore / 2; }
-static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score.
+static constexpr int PerfectBonus = 4; // Perfect per-pattern-char score.
 
 FuzzyMatcher::FuzzyMatcher(llvm::StringRef Pattern)
     : PatN(std::min<int>(MaxPat, Pattern.size())),
@@ -267,24 +267,31 @@
 }
 
 int FuzzyMatcher::skipPenalty(int W, Action Last) const {
-  int S = 0;
+  if (W == 0) // Skipping the first character.
+    return 3;
   if (WordRole[W] == Head) // Skipping a segment.
-    S += 1;
-  if (Last == Match) // Non-consecutive match.
-    S += 2;          // We'd rather skip a segment than split our match.
-  return S;
+    return 1; // We want to keep this lower than a consecutive match bonus.
+  // Instead of penalizing non-consecutive matches, we give a bonus to a
+  // consecutive match in matchBonus. This produces a better score distribution
+  // than penalties in case of small patterns, e.g. 'up' for 'unique_ptr'.
+  return 0;
 }
 
 int FuzzyMatcher::matchBonus(int P, int W, Action Last) const {
   assert(LowPat[P] == LowWord[W]);
   int S = 1;
-  // Bonus: pattern so far is a (case-insensitive) prefix of the word.
-  if (P == W) // We can't skip pattern characters, so we must have matched all.
-    ++S;
+  bool IsPatSingleCase =
+      (PatTypeSet == 1 << Lower) || (PatTypeSet == 1 << Upper);
   // Bonus: case matches, or a Head in the pattern aligns with one in the word.
-  if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) ||
-      (PatRole[P] == Head && WordRole[W] == Head))
+  // Single-case patterns lack segmentation signals and we assume any character
+  // can be a head of a segment.
+  if (Pat[P] == Word[W] ||
+      (WordRole[W] == Head && (IsPatSingleCase || PatRole[P] == Head)))
     ++S;
+  // Bonus: a consecutive match. First character match also gets a bonus to
+  // ensure prefix final match score normalizes to 1.0.
+  if (W == 0 || Last == Match)
+    S += 2;
   // Penalty: matching inside a segment (and previous char wasn't matched).
   if (WordRole[W] == Tail && P && Last == Miss)
     S -= 3;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59300.190803.patch
Type: text/x-patch
Size: 4155 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20190315/a63cfa41/attachment-0001.bin>


More information about the cfe-commits mailing list