[PATCH] D46183: [clangd] Incorporate #occurrences in scoring code complete results.
Sam McCall via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 27 05:33:34 PDT 2018
sammccall created this revision.
sammccall added a reviewer: ilya-biryukov.
Herald added subscribers: cfe-commits, jkorous, MaskRay, ioeric, klimek.
needs tests
Repository:
rCTE Clang Tools Extra
https://reviews.llvm.org/D46183
Files:
clangd/CodeComplete.cpp
clangd/index/Index.cpp
clangd/index/Index.h
clangd/index/MemIndex.cpp
Index: clangd/index/MemIndex.cpp
===================================================================
--- clangd/index/MemIndex.cpp
+++ clangd/index/MemIndex.cpp
@@ -47,7 +47,7 @@
continue;
if (auto Score = Filter.match(Sym->Name)) {
- Top.emplace(-*Score, Sym);
+ Top.emplace(-*Score * quality(*Sym), Sym);
if (Top.size() > Req.MaxCandidateCount) {
More = true;
Top.pop();
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -199,6 +199,12 @@
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
+// Computes query-independent quality score for a Symbol.
+// This currently falls in the range [1, ln(#indexed documents)].
+// FIXME: this should probably be split into symbol -> signals
+// and signals -> score, so it can be reused for Sema completions.
+double quality(const Symbol &S);
+
// An immutable symbol container that stores a set of symbols.
// The container will maintain the lifetime of the symbols.
class SymbolSlab {
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -48,6 +48,14 @@
return OS << S.Scope << S.Name;
}
+double quality(const Symbol &S) {
+ // This avoids a sharp gradient for tail symbols, and also neatly avoids the
+ // question of whether 0 references means a bad symbol or missing data.
+ if (S.References < 3)
+ return 1;
+ return std::log(S.References);
+}
+
SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID,
[](const Symbol &S, const SymbolID &I) {
Index: clangd/CodeComplete.cpp
===================================================================
--- clangd/CodeComplete.cpp
+++ clangd/CodeComplete.cpp
@@ -229,24 +229,26 @@
// Computes the "symbol quality" score for this completion. Higher is better.
float score() const {
- // For now we just use the Sema priority, mapping it onto a 0-1 interval.
- if (!SemaResult) // FIXME(sammccall): better scoring for index results.
- return 0.3f; // fixed mediocre score for index-only results.
-
- // Priority 80 is a really bad score.
- float Score = 1 - std::min<float>(80, SemaResult->Priority) / 80;
+ float Score = 1;
+ if (IndexResult)
+ Score *= quality(*IndexResult);
+ if (SemaResult) {
+ // For now we just use the Sema priority, mapping it onto a 0-2 interval.
+ // Priority 80 is a really bad score.
+ Score *= 2 - std::min<float>(80, SemaResult->Priority) / 40;
- switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
- case CXAvailability_Available:
- // No penalty.
- break;
- case CXAvailability_Deprecated:
- Score *= 0.1f;
- break;
- case CXAvailability_NotAccessible:
- case CXAvailability_NotAvailable:
- Score = 0;
- break;
+ switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
+ case CXAvailability_Available:
+ // No penalty.
+ break;
+ case CXAvailability_Deprecated:
+ Score *= 0.1f;
+ break;
+ case CXAvailability_NotAccessible:
+ case CXAvailability_NotAvailable:
+ Score = 0;
+ break;
+ }
}
return Score;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D46183.144317.patch
Type: text/x-patch
Size: 3468 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180427/5aae566f/attachment.bin>
More information about the cfe-commits
mailing list