[clang-tools-extra] 158af0d - [clangd] Refactor code completion signal's utility properties.

Utkarsh Saxena via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 23 07:15:39 PDT 2020


Author: Utkarsh Saxena
Date: 2020-09-23T16:12:18+02:00
New Revision: 158af0d3d165c0382a6a291e81ffecf0b18ffe77

URL: https://github.com/llvm/llvm-project/commit/158af0d3d165c0382a6a291e81ffecf0b18ffe77
DIFF: https://github.com/llvm/llvm-project/commit/158af0d3d165c0382a6a291e81ffecf0b18ffe77.diff

LOG: [clangd] Refactor code completion signal's utility properties.

Current implementation of heuristic-based scoring function also contains
computation of derived signals (e.g. whether name contains a word from
context, computing file distances, scope distances.)
This is an attempt to separate out the logic for computation of derived
signals from the scoring function.
This will allow us to have a clean API for scoring functions that will
take only concrete code completion signals as input.

Differential Revision: https://reviews.llvm.org/D88146

Added: 
    

Modified: 
    clang-tools-extra/clangd/Quality.cpp
    clang-tools-extra/clangd/Quality.h

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp
index 956253ecf183..bf0c0957084c 100644
--- a/clang-tools-extra/clangd/Quality.cpp
+++ b/clang-tools-extra/clangd/Quality.cpp
@@ -320,35 +320,51 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
   NeedsFixIts = !SemaCCResult.FixIts.empty();
 }
 
-static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI,
-                                               URIDistance *D) {
-  if (!D || SymbolURI.empty())
-    return {0.f, 0u};
-  unsigned Distance = D->distance(SymbolURI);
+static float fileProximityScore(unsigned FileDistance) {
+  // Range: [0, 1]
+  // FileDistance = [0, 1, 2, 3, 4, .., FileDistance::Unreachable]
+  // Score = [1, 0.82, 0.67, 0.55, 0.45, .., 0]
+  if (FileDistance == FileDistance::Unreachable)
+    return 0;
   // Assume approximately default options are used for sensible scoring.
-  return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
+  return std::exp(FileDistance * -0.4f / FileDistanceOptions().UpCost);
 }
 
-static float scopeBoost(ScopeDistance &Distance,
-                        llvm::Optional<llvm::StringRef> SymbolScope) {
-  if (!SymbolScope)
-    return 1;
-  auto D = Distance.distance(*SymbolScope);
-  if (D == FileDistance::Unreachable)
+static float scopeProximityScore(unsigned ScopeDistance) {
+  // Range: [0.6, 2].
+  // ScopeDistance = [0, 1, 2, 3, 4, 5, 6, 7, .., FileDistance::Unreachable]
+  // Score = [2.0, 1.55, 1.2, 0.93, 0.72, 0.65, 0.65, 0.65, .., 0.6]
+  if (ScopeDistance == FileDistance::Unreachable)
     return 0.6f;
-  return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0));
+  return std::max(0.65, 2.0 * std::pow(0.6, ScopeDistance / 2.0));
 }
 
 static llvm::Optional<llvm::StringRef>
 wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) {
   if (ContextWords)
-    for (const auto& Word : ContextWords->keys())
+    for (const auto &Word : ContextWords->keys())
       if (Name.contains_lower(Word))
         return Word;
   return llvm::None;
 }
 
+SymbolRelevanceSignals::DerivedSignals
+SymbolRelevanceSignals::calculateDerivedSignals() const {
+  DerivedSignals Derived;
+  Derived.NameMatchesContext = wordMatching(Name, ContextWords).hasValue();
+  Derived.FileProximityDistance = !FileProximityMatch || SymbolURI.empty()
+                                      ? FileDistance::Unreachable
+                                      : FileProximityMatch->distance(SymbolURI);
+  if (ScopeProximityMatch) {
+    // For global symbol, the distance is 0.
+    Derived.ScopeProximityDistance =
+        SymbolScope ? ScopeProximityMatch->distance(*SymbolScope) : 0;
+  }
+  return Derived;
+}
+
 float SymbolRelevanceSignals::evaluate() const {
+  DerivedSignals Derived = calculateDerivedSignals();
   float Score = 1;
 
   if (Forbidden)
@@ -358,7 +374,7 @@ float SymbolRelevanceSignals::evaluate() const {
 
   // File proximity scores are [0,1] and we translate them into a multiplier in
   // the range from 1 to 3.
-  Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first,
+  Score *= 1 + 2 * std::max(fileProximityScore(Derived.FileProximityDistance),
                             SemaFileProximityScore);
 
   if (ScopeProximityMatch)
@@ -366,10 +382,11 @@ float SymbolRelevanceSignals::evaluate() const {
     // can be tricky (e.g. class/function scope). Set to the max boost as we
     // don't load top-level symbols from the preamble and sema results are
     // always in the accessible scope.
-    Score *=
-        SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope);
+    Score *= SemaSaysInScope
+                 ? 2.0
+                 : scopeProximityScore(Derived.ScopeProximityDistance);
 
-  if (wordMatching(Name, ContextWords))
+  if (Derived.NameMatchesContext)
     Score *= 1.5;
 
   // Symbols like local variables may only be referenced within their scope.
@@ -445,17 +462,18 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
   OS << llvm::formatv("\tSymbol scope: {0}\n",
                       S.SymbolScope ? *S.SymbolScope : "<None>");
 
+  SymbolRelevanceSignals::DerivedSignals Derived = S.calculateDerivedSignals();
   if (S.FileProximityMatch) {
-    auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch);
-    OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n",
-                        Score.first, Score.second);
+    unsigned Score = fileProximityScore(Derived.FileProximityDistance);
+    OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", Score,
+                        Derived.FileProximityDistance);
   }
   OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore);
 
   OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope);
   if (S.ScopeProximityMatch)
     OS << llvm::formatv("\tIndex scope boost: {0}\n",
-                        scopeBoost(*S.ScopeProximityMatch, S.SymbolScope));
+                        scopeProximityScore(Derived.ScopeProximityDistance));
 
   OS << llvm::formatv(
       "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n",

diff  --git a/clang-tools-extra/clangd/Quality.h b/clang-tools-extra/clangd/Quality.h
index 54aa7cdb6488..04c6ce211ca9 100644
--- a/clang-tools-extra/clangd/Quality.h
+++ b/clang-tools-extra/clangd/Quality.h
@@ -136,6 +136,19 @@ struct SymbolRelevanceSignals {
   // Whether the item matches the type expected in the completion context.
   bool TypeMatchesPreferred = false;
 
+  /// Set of derived signals computed by calculateDerivedSignals(). Must not be
+  /// set explicitly.
+  struct DerivedSignals {
+    /// Whether Name contains some word from context.
+    bool NameMatchesContext = false;
+    /// Min distance between SymbolURI and all the headers included by the TU.
+    unsigned FileProximityDistance = FileDistance::Unreachable;
+    /// Min distance between SymbolScope and all the available scopes.
+    unsigned ScopeProximityDistance = FileDistance::Unreachable;
+  };
+
+  DerivedSignals calculateDerivedSignals() const;
+
   void merge(const CodeCompletionResult &SemaResult);
   void merge(const Symbol &IndexResult);
 


        


More information about the cfe-commits mailing list