[PATCH] D46183: [clangd] Incorporate #occurrences in scoring code complete results.

Sam McCall via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 27 05:33:34 PDT 2018


sammccall created this revision.
sammccall added a reviewer: ilya-biryukov.
Herald added subscribers: cfe-commits, jkorous, MaskRay, ioeric, klimek.

needs tests


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D46183

Files:
  clangd/CodeComplete.cpp
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/MemIndex.cpp


Index: clangd/index/MemIndex.cpp
===================================================================
--- clangd/index/MemIndex.cpp
+++ clangd/index/MemIndex.cpp
@@ -47,7 +47,7 @@
         continue;
 
       if (auto Score = Filter.match(Sym->Name)) {
-        Top.emplace(-*Score, Sym);
+        Top.emplace(-*Score * quality(*Sym), Sym);
         if (Top.size() > Req.MaxCandidateCount) {
           More = true;
           Top.pop();
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -199,6 +199,12 @@
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
 
+// Computes query-independent quality score for a Symbol.
+// This currently falls in the range [1, ln(#indexed documents)].
+// FIXME: this should probably be split into symbol -> signals
+//        and signals -> score, so it can be reused for Sema completions.
+double quality(const Symbol &S);
+
 // An immutable symbol container that stores a set of symbols.
 // The container will maintain the lifetime of the symbols.
 class SymbolSlab {
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -48,6 +48,14 @@
   return OS << S.Scope << S.Name;
 }
 
+double quality(const Symbol &S) {
+  // This avoids a sharp gradient for tail symbols, and also neatly avoids the
+  // question of whether 0 references means a bad symbol or missing data.
+  if (S.References < 3)
+    return 1;
+  return std::log(S.References);
+}
+
 SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
   auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID,
                              [](const Symbol &S, const SymbolID &I) {
Index: clangd/CodeComplete.cpp
===================================================================
--- clangd/CodeComplete.cpp
+++ clangd/CodeComplete.cpp
@@ -229,24 +229,26 @@
 
   // Computes the "symbol quality" score for this completion. Higher is better.
   float score() const {
-    // For now we just use the Sema priority, mapping it onto a 0-1 interval.
-    if (!SemaResult) // FIXME(sammccall): better scoring for index results.
-      return 0.3f;   // fixed mediocre score for index-only results.
-
-    // Priority 80 is a really bad score.
-    float Score = 1 - std::min<float>(80, SemaResult->Priority) / 80;
+    float Score = 1;
+    if (IndexResult)
+      Score *= quality(*IndexResult);
+    if (SemaResult) {
+      // For now we just use the Sema priority, mapping it onto a 0-2 interval.
+      // Priority 80 is a really bad score.
+      Score *= 2 - std::min<float>(80, SemaResult->Priority) / 40;
 
-    switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
-    case CXAvailability_Available:
-      // No penalty.
-      break;
-    case CXAvailability_Deprecated:
-      Score *= 0.1f;
-      break;
-    case CXAvailability_NotAccessible:
-    case CXAvailability_NotAvailable:
-      Score = 0;
-      break;
+      switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
+      case CXAvailability_Available:
+        // No penalty.
+        break;
+      case CXAvailability_Deprecated:
+        Score *= 0.1f;
+        break;
+      case CXAvailability_NotAccessible:
+      case CXAvailability_NotAvailable:
+        Score = 0;
+        break;
+      }
     }
     return Score;
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D46183.144317.patch
Type: text/x-patch
Size: 3468 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180427/5aae566f/attachment.bin>


More information about the cfe-commits mailing list