[clang-tools-extra] 7df80a1 - [clangd] Add support for multiple DecisionForest model experiments.

Thu Oct 29 11:51:03 PDT 2020

Author: Utkarsh Saxena
Date: 2020-10-29T19:49:40+01:00
New Revision: 7df80a1204f0dc91fb8bba7635e2354adf08989f

URL: https://github.com/llvm/llvm-project/commit/7df80a1204f0dc91fb8bba7635e2354adf08989f
DIFF: https://github.com/llvm/llvm-project/commit/7df80a1204f0dc91fb8bba7635e2354adf08989f.diff

LOG: [clangd] Add support for multiple DecisionForest model experiments.

With every incremental change, one needs to check-in new model upstream.
This also significantly increases the size of the git repo with every
new model.
Testing and comparing the old and previous model is also not possible as
we run only a single model at any point.

One solution is to have a "staging" decision forest which can be
injected into clangd without pushing it to upstream. Compare the
performance of the staging model with the live model. After a couple of
enhancements have been done to staging model, we can then replace the
live model upstream with the staging model. This reduces upstream churn
and also allows us to compare models with current baseline model.

This is done by having a callback in CodeCompleteOptions which is called
only when we want to use a decision forest ranking model. This allows us
to inject different completion model internally.

Differential Revision: https://reviews.llvm.org/D90014

Added: 
    

Modified: 
    clang-tools-extra/clangd/CodeComplete.cpp
    clang-tools-extra/clangd/CodeComplete.h
    clang-tools-extra/clangd/Quality.cpp
    clang-tools-extra/clangd/Quality.h
    clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index 1d2770823190..8396de635135 100644

--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -1644,19 +1644,10 @@ class CodeCompleteFlow {
       return Scores;
 
     case RM::DecisionForest:
-      Scores.Quality = 0;
-      Scores.Relevance = 0;
-      // Exponentiating DecisionForest prediction makes the score of each tree a
-      // multiplciative boost (like NameMatch). This allows us to weigh the
-      // prediciton score and NameMatch appropriately.
-      Scores.ExcludingName = pow(Opts.DecisionForestBase,
-                                 evaluateDecisionForest(Quality, Relevance));
-      // NeedsFixIts is not part of the DecisionForest as generating training
-      // data that needs fixits is not-feasible.
-      if (Relevance.NeedsFixIts)
-        Scores.ExcludingName *= 0.5;
-      // NameMatch should be a multiplier on total score to support rescoring.
-      Scores.Total = Relevance.NameMatch * Scores.ExcludingName;
+      DecisionForestScores DFScores = Opts.DecisionForestScorer(
+          Quality, Relevance, Opts.DecisionForestBase);
+      Scores.ExcludingName = DFScores.ExcludingName;
+      Scores.Total = DFScores.Total;
       return Scores;
     }
     llvm_unreachable("Unhandled CodeCompletion ranking model.");

diff  --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h
index 82a2656f172e..ce8a2097a6d9 100644
--- a/clang-tools-extra/clangd/CodeComplete.h
+++ b/clang-tools-extra/clangd/CodeComplete.h
@@ -154,14 +154,21 @@ struct CodeCompleteOptions {
     DecisionForest,
   } RankingModel = Heuristics;
 
+  /// Callback used to score a CompletionCandidate if DecisionForest ranking
+  /// model is enabled.
+  /// This allows us to inject experimental models and compare them with
+  /// baseline model using A/B testing.
+  std::function<DecisionForestScores(
+      const SymbolQualitySignals &, const SymbolRelevanceSignals &, float Base)>
+      DecisionForestScorer = &evaluateDecisionForest;
   /// Weight for combining NameMatch and Prediction of DecisionForest.
   /// CompletionScore is NameMatch * pow(Base, Prediction).
   /// The optimal value of Base largely depends on the semantics of the model
   /// and prediction score (e.g. algorithm used during training, number of
   /// trees, etc.). Usually if the range of Prediciton is [-20, 20] then a Base
   /// in [1.2, 1.7] works fine.
-  /// Semantics: E.g. the completion score reduces by 50% if the Prediciton
-  /// score is reduced by 2.6 points for Base = 1.3.
+  /// Semantics: E.g. For Base = 1.3, if the Prediciton score reduces by 2.6
+  /// points then completion score reduces by 50% or 1.3^(-2.6).
   float DecisionForestBase = 1.3f;
 };
 

diff  --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp
index 55b97eef123f..96784c077d05 100644
--- a/clang-tools-extra/clangd/Quality.cpp
+++ b/clang-tools-extra/clangd/Quality.cpp
@@ -487,8 +487,9 @@ float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
   return SymbolQuality * SymbolRelevance;
 }
 
-float evaluateDecisionForest(const SymbolQualitySignals &Quality,
-                             const SymbolRelevanceSignals &Relevance) {
+DecisionForestScores
+evaluateDecisionForest(const SymbolQualitySignals &Quality,
+                       const SymbolRelevanceSignals &Relevance, float Base) {
   Example E;
   E.setIsDeprecated(Quality.Deprecated);
   E.setIsReservedName(Quality.ReservedName);
@@ -512,7 +513,19 @@ float evaluateDecisionForest(const SymbolQualitySignals &Quality,
   E.setHadSymbolType(Relevance.HadSymbolType);
   E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred);
   E.setFilterLength(Relevance.FilterLength);
-  return Evaluate(E);
+
+  DecisionForestScores Scores;
+  // Exponentiating DecisionForest prediction makes the score of each tree a
+  // multiplciative boost (like NameMatch). This allows us to weigh the
+  // prediciton score and NameMatch appropriately.
+  Scores.ExcludingName = pow(Base, Evaluate(E));
+  // NeedsFixIts is not part of the DecisionForest as generating training
+  // data that needs fixits is not-feasible.
+  if (Relevance.NeedsFixIts)
+    Scores.ExcludingName *= 0.5;
+  // NameMatch should be a multiplier on total score to support rescoring.
+  Scores.Total = Relevance.NameMatch * Scores.ExcludingName;
+  return Scores;
 }
 
 // Produces an integer that sorts in the same order as F.

diff  --git a/clang-tools-extra/clangd/Quality.h b/clang-tools-extra/clangd/Quality.h
index afbc0f10208f..ab1174f2c286 100644
--- a/clang-tools-extra/clangd/Quality.h
+++ b/clang-tools-extra/clangd/Quality.h
@@ -165,8 +165,18 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &,
 /// Combine symbol quality and relevance into a single score.
 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
 
-float evaluateDecisionForest(const SymbolQualitySignals &Quality,
-                             const SymbolRelevanceSignals &Relevance);
+/// Same semantics as CodeComplete::Score. Quality score and Relevance score
+/// have been removed since DecisionForest cannot assign individual scores to
+/// Quality and Relevance signals.
+struct DecisionForestScores {
+  float Total = 0.f;
+  float ExcludingName = 0.f;
+};
+
+DecisionForestScores
+evaluateDecisionForest(const SymbolQualitySignals &Quality,
+                       const SymbolRelevanceSignals &Relevance, float Base);
+
 /// TopN<T> is a lossy container that preserves only the "best" N elements.
 template <typename T, typename Compare = std::greater<T>> class TopN {
 public:

diff  --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index 15576eddf76e..b2534bdf9c19 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -194,6 +194,33 @@ TEST(DecisionForestRankingModel, ReferencesAffectRanking) {
       ElementsAre(Named("clangA"), Named("clangD")));
 }
 
+TEST(DecisionForestRankingModel, DecisionForestScorerCallbackTest) {
+  clangd::CodeCompleteOptions Opts;
+  constexpr float MagicNumber = 1234.5678;
+  Opts.RankingModel = CodeCompleteOptions::DecisionForest;
+  Opts.DecisionForestScorer = [](const SymbolQualitySignals &,
+                                 const SymbolRelevanceSignals &, float Base) {
+    DecisionForestScores Scores;
+    Scores.Total = MagicNumber;
+    Scores.ExcludingName = MagicNumber;
+    return Scores;
+  };
+  llvm::StringRef Code = "int func() { int xyz; xy^ }";
+  auto Results = completions(Code,
+                             /*IndexSymbols=*/{}, Opts);
+  ASSERT_EQ(Results.Completions.size(), 1u);
+  EXPECT_EQ(Results.Completions[0].Score.Total, MagicNumber);
+  EXPECT_EQ(Results.Completions[0].Score.ExcludingName, MagicNumber);
+
+  // Do not use DecisionForestScorer for heuristics model.
+  Opts.RankingModel = CodeCompleteOptions::Heuristics;
+  Results = completions(Code,
+                        /*IndexSymbols=*/{}, Opts);
+  ASSERT_EQ(Results.Completions.size(), 1u);
+  EXPECT_NE(Results.Completions[0].Score.Total, MagicNumber);
+  EXPECT_NE(Results.Completions[0].Score.ExcludingName, MagicNumber);
+}
+
 TEST(CompletionTest, Limit) {
   clangd::CodeCompleteOptions Opts;
   Opts.Limit = 2;