[clang-tools-extra] r331457 - [clangd] Incorporate #occurrences in scoring code complete results.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu May 3 07:53:03 PDT 2018


Author: sammccall
Date: Thu May  3 07:53:02 2018
New Revision: 331457

URL: http://llvm.org/viewvc/llvm-project?rev=331457&view=rev
Log:
[clangd] Incorporate #occurrences in scoring code complete results.

Summary: needs tests

Reviewers: ilya-biryukov

Subscribers: klimek, ioeric, MaskRay, jkorous, cfe-commits

Differential Revision: https://reviews.llvm.org/D46183

Modified:
    clang-tools-extra/trunk/clangd/CodeComplete.cpp
    clang-tools-extra/trunk/clangd/index/Index.cpp
    clang-tools-extra/trunk/clangd/index/Index.h
    clang-tools-extra/trunk/clangd/index/MemIndex.cpp
    clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp

Modified: clang-tools-extra/trunk/clangd/CodeComplete.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CodeComplete.cpp?rev=331457&r1=331456&r2=331457&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CodeComplete.cpp (original)
+++ clang-tools-extra/trunk/clangd/CodeComplete.cpp Thu May  3 07:53:02 2018
@@ -229,24 +229,27 @@ struct CompletionCandidate {
 
   // Computes the "symbol quality" score for this completion. Higher is better.
   float score() const {
-    // For now we just use the Sema priority, mapping it onto a 0-1 interval.
-    if (!SemaResult) // FIXME(sammccall): better scoring for index results.
-      return 0.3f;   // fixed mediocre score for index-only results.
+    float Score = 1;
+    if (IndexResult)
+      Score *= quality(*IndexResult);
+    if (SemaResult) {
+      // For now we just use the Sema priority, mapping it onto a 0-2 interval.
+      // That makes 1 neutral-ish, so we don't reward/penalize non-Sema results.
+      // Priority 80 is a really bad score.
+      Score *= 2 - std::min<float>(80, SemaResult->Priority) / 40;
 
-    // Priority 80 is a really bad score.
-    float Score = 1 - std::min<float>(80, SemaResult->Priority) / 80;
-
-    switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
-    case CXAvailability_Available:
-      // No penalty.
-      break;
-    case CXAvailability_Deprecated:
-      Score *= 0.1f;
-      break;
-    case CXAvailability_NotAccessible:
-    case CXAvailability_NotAvailable:
-      Score = 0;
-      break;
+      switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
+      case CXAvailability_Available:
+        // No penalty.
+        break;
+      case CXAvailability_Deprecated:
+        Score *= 0.1f;
+        break;
+      case CXAvailability_NotAccessible:
+      case CXAvailability_NotAvailable:
+        Score = 0;
+        break;
+      }
     }
     return Score;
   }

Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=331457&r1=331456&r2=331457&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Index.cpp Thu May  3 07:53:02 2018
@@ -48,6 +48,14 @@ raw_ostream &operator<<(raw_ostream &OS,
   return OS << S.Scope << S.Name;
 }
 
+double quality(const Symbol &S) {
+  // This avoids a sharp gradient for tail symbols, and also neatly avoids the
+  // question of whether 0 references means a bad symbol or missing data.
+  if (S.References < 3)
+    return 1;
+  return std::log(S.References);
+}
+
 SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
   auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID,
                              [](const Symbol &S, const SymbolID &I) {

Modified: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=331457&r1=331456&r2=331457&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (original)
+++ clang-tools-extra/trunk/clangd/index/Index.h Thu May  3 07:53:02 2018
@@ -199,6 +199,12 @@ struct Symbol {
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
 
+// Computes query-independent quality score for a Symbol.
+// This currently falls in the range [1, ln(#indexed documents)].
+// FIXME: this should probably be split into symbol -> signals
+//        and signals -> score, so it can be reused for Sema completions.
+double quality(const Symbol &S);
+
 // An immutable symbol container that stores a set of symbols.
 // The container will maintain the lifetime of the symbols.
 class SymbolSlab {

Modified: clang-tools-extra/trunk/clangd/index/MemIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/MemIndex.cpp?rev=331457&r1=331456&r2=331457&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/MemIndex.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/MemIndex.cpp Thu May  3 07:53:02 2018
@@ -47,7 +47,7 @@ bool MemIndex::fuzzyFind(
         continue;
 
       if (auto Score = Filter.match(Sym->Name)) {
-        Top.emplace(-*Score, Sym);
+        Top.emplace(-*Score * quality(*Sym), Sym);
         if (Top.size() > Req.MaxCandidateCount) {
           More = true;
           Top.pop();

Modified: clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp?rev=331457&r1=331456&r2=331457&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/CodeCompleteTests.cpp Thu May  3 07:53:02 2018
@@ -144,6 +144,13 @@ Symbol cls(StringRef Name) {
 Symbol var(StringRef Name) {
   return sym(Name, index::SymbolKind::Variable, "@\\0");
 }
+Symbol ns(StringRef Name) {
+  return sym(Name, index::SymbolKind::Namespace, "@N@\\0");
+}
+Symbol withReferences(int N, Symbol S) {
+  S.References = N;
+  return S;
+}
 
 TEST(CompletionTest, Limit) {
   clangd::CodeCompleteOptions Opts;
@@ -443,6 +450,14 @@ TEST(CompletionTest, ScopedWithFilter) {
               UnorderedElementsAre(AllOf(Named("XYZ"), Filter("XYZ"))));
 }
 
+TEST(CompletionTest, ReferencesAffectRanking) {
+  auto Results = completions("int main() { abs^ }", {ns("absl"), func("abs")});
+  EXPECT_THAT(Results.items, HasSubsequence(Named("abs"), Named("absl")));
+  Results = completions("int main() { abs^ }",
+                        {withReferences(10000, ns("absl")), func("abs")});
+  EXPECT_THAT(Results.items, HasSubsequence(Named("absl"), Named("abs")));
+}
+
 TEST(CompletionTest, GlobalQualified) {
   auto Results = completions(
       R"cpp(




More information about the cfe-commits mailing list