[clang-tools-extra] r343777 - [clangd] Remove one-segment-skipping from Dex trigrams.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 4 07:08:11 PDT 2018


Author: sammccall
Date: Thu Oct  4 07:08:11 2018
New Revision: 343777

URL: http://llvm.org/viewvc/llvm-project?rev=343777&view=rev
Log:
[clangd] Remove one-segment-skipping from Dex trigrams.

Summary:
Currently queries like "ab" can match identifiers like a_yellow_bee.
The value of allowing this for exactly one segment but no more seems dubious.
It costs ~3% of overall ram (~9% of posting list ram) and some quality.

Reviewers: ilya-biryukov, ioeric

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D52885

Modified:
    clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp
    clang-tools-extra/trunk/clangd/index/dex/Trigram.h
    clang-tools-extra/trunk/unittests/clangd/DexTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp?rev=343777&r1=343776&r2=343777&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp Thu Oct  4 07:08:11 2018
@@ -36,17 +36,15 @@ std::vector<Token> generateIdentifierTri
   //
   // * Next Tail - next character from the same segment
   // * Next Head - front character of the next segment
-  // * Skip-1-Next Head - front character of the skip-1-next segment
   //
   // Next stores tuples of three indices in the presented order, if a variant is
   // not available then 0 is stored.
   std::vector<std::array<unsigned, 3>> Next(LowercaseIdentifier.size());
-  unsigned NextTail = 0, NextHead = 0, NextNextHead = 0;
+  unsigned NextTail = 0, NextHead = 0;
   for (int I = LowercaseIdentifier.size() - 1; I >= 0; --I) {
-    Next[I] = {{NextTail, NextHead, NextNextHead}};
+    Next[I] = {{NextTail, NextHead}};
     NextTail = Roles[I] == Tail ? I : 0;
     if (Roles[I] == Head) {
-      NextNextHead = NextHead;
       NextHead = I;
     }
   }

Modified: clang-tools-extra/trunk/clangd/index/dex/Trigram.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Trigram.h?rev=343777&r1=343776&r2=343777&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Trigram.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Trigram.h Thu Oct  4 07:08:11 2018
@@ -37,7 +37,7 @@ namespace dex {
 ///
 /// The symbol's name is broken into segments, e.g. "FooBar" has two segments.
 /// Trigrams can start at any character in the input. Then we can choose to move
-/// to the next character, move to the start of the next segment, or stop.
+/// to the next character or to the start of the next segment.
 ///
 /// Short trigrams (length 1-2) are used for short queries. These are:
 ///  - prefixes of the identifier, of length 1 and 2

Modified: clang-tools-extra/trunk/unittests/clangd/DexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/DexTests.cpp?rev=343777&r1=343776&r2=343777&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/DexTests.cpp Thu Oct  4 07:08:11 2018
@@ -381,8 +381,7 @@ TEST(DexTrigrams, IdentifierTrigrams) {
                            "cde", "def"}));
 
   EXPECT_THAT(generateIdentifierTrigrams("a_b_c_d_e_"),
-              trigramsAre({"a", "a_", "ab", "abc", "abd", "acd", "ace", "bcd",
-                           "bce", "bde", "cde"}));
+              trigramsAre({"a", "a_", "ab", "abc", "bcd", "cde"}));
 
   EXPECT_THAT(generateIdentifierTrigrams("unique_ptr"),
               trigramsAre({"u", "un", "up", "uni", "unp", "upt", "niq", "nip",
@@ -396,14 +395,11 @@ TEST(DexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("IsOK"),
               trigramsAre({"i", "is", "io", "iso", "iok", "sok"}));
 
-  auto X = generateIdentifierTrigrams("abc_defGhij__klm");
   EXPECT_THAT(
       generateIdentifierTrigrams("abc_defGhij__klm"),
-      trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "abg", "ade", "adg",
-                   "adk", "agh", "agk", "bcd", "bcg", "bde", "bdg", "bdk",
-                   "bgh", "bgk", "cde", "cdg", "cdk", "cgh", "cgk", "def",
-                   "deg", "dek", "dgh", "dgk", "dkl", "efg", "efk", "egh",
-                   "egk", "ekl", "fgh", "fgk", "fkl", "ghi", "ghk", "gkl",
+      trigramsAre({"a",   "ab",  "ad",  "abc", "abd", "ade", "adg", "bcd",
+                   "bde", "bdg", "cde", "cdg", "def", "deg", "dgh", "dgk",
+                   "efg", "egh", "egk", "fgh", "fgk", "ghi", "ghk", "gkl",
                    "hij", "hik", "hkl", "ijk", "ikl", "jkl", "klm"}));
 }
 




More information about the cfe-commits mailing list