[PATCH] D90014: [clangd] Add support for multiple DecisionForest model experiments.

Utkarsh Saxena via Phabricator via cfe-commits Thu, 29 Oct 2020 11:51:24 -0700

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7df80a1204f0: [clangd] Add support for multiple 
DecisionForest model experiments. (authored by usaxena95).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90014/new/

https://reviews.llvm.org/D90014

Files:
  clang-tools-extra/clangd/CodeComplete.cpp
  clang-tools-extra/clangd/CodeComplete.h
  clang-tools-extra/clangd/Quality.cpp
  clang-tools-extra/clangd/Quality.h
  clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp

Index: clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -194,6 +194,33 @@
       ElementsAre(Named("clangA"), Named("clangD")));
 }
 
+TEST(DecisionForestRankingModel, DecisionForestScorerCallbackTest) {
+  clangd::CodeCompleteOptions Opts;
+  constexpr float MagicNumber = 1234.5678;
+  Opts.RankingModel = CodeCompleteOptions::DecisionForest;
+  Opts.DecisionForestScorer = [](const SymbolQualitySignals &,
+                                 const SymbolRelevanceSignals &, float Base) {
+    DecisionForestScores Scores;
+    Scores.Total = MagicNumber;
+    Scores.ExcludingName = MagicNumber;
+    return Scores;
+  };
+  llvm::StringRef Code = "int func() { int xyz; xy^ }";
+  auto Results = completions(Code,
+                             /*IndexSymbols=*/{}, Opts);
+  ASSERT_EQ(Results.Completions.size(), 1u);
+  EXPECT_EQ(Results.Completions[0].Score.Total, MagicNumber);
+  EXPECT_EQ(Results.Completions[0].Score.ExcludingName, MagicNumber);
+
+  // Do not use DecisionForestScorer for heuristics model.
+  Opts.RankingModel = CodeCompleteOptions::Heuristics;
+  Results = completions(Code,
+                        /*IndexSymbols=*/{}, Opts);
+  ASSERT_EQ(Results.Completions.size(), 1u);
+  EXPECT_NE(Results.Completions[0].Score.Total, MagicNumber);
+  EXPECT_NE(Results.Completions[0].Score.ExcludingName, MagicNumber);
+}
+
 TEST(CompletionTest, Limit) {
   clangd::CodeCompleteOptions Opts;
   Opts.Limit = 2;
Index: clang-tools-extra/clangd/Quality.h
===================================================================
--- clang-tools-extra/clangd/Quality.h
+++ clang-tools-extra/clangd/Quality.h
@@ -165,8 +165,18 @@
 /// Combine symbol quality and relevance into a single score.
 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
 
-float evaluateDecisionForest(const SymbolQualitySignals &Quality,
-                             const SymbolRelevanceSignals &Relevance);
+/// Same semantics as CodeComplete::Score. Quality score and Relevance score
+/// have been removed since DecisionForest cannot assign individual scores to
+/// Quality and Relevance signals.
+struct DecisionForestScores {
+  float Total = 0.f;
+  float ExcludingName = 0.f;
+};
+
+DecisionForestScores
+evaluateDecisionForest(const SymbolQualitySignals &Quality,
+                       const SymbolRelevanceSignals &Relevance, float Base);
+
 /// TopN<T> is a lossy container that preserves only the "best" N elements.
 template <typename T, typename Compare = std::greater<T>> class TopN {
 public:
Index: clang-tools-extra/clangd/Quality.cpp
===================================================================
--- clang-tools-extra/clangd/Quality.cpp
+++ clang-tools-extra/clangd/Quality.cpp
@@ -487,8 +487,9 @@
   return SymbolQuality * SymbolRelevance;
 }
 
-float evaluateDecisionForest(const SymbolQualitySignals &Quality,
-                             const SymbolRelevanceSignals &Relevance) {
+DecisionForestScores
+evaluateDecisionForest(const SymbolQualitySignals &Quality,
+                       const SymbolRelevanceSignals &Relevance, float Base) {
   Example E;
   E.setIsDeprecated(Quality.Deprecated);
   E.setIsReservedName(Quality.ReservedName);
@@ -512,7 +513,19 @@
   E.setHadSymbolType(Relevance.HadSymbolType);
   E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred);
   E.setFilterLength(Relevance.FilterLength);
-  return Evaluate(E);
+
+  DecisionForestScores Scores;
+  // Exponentiating DecisionForest prediction makes the score of each tree a
+  // multiplciative boost (like NameMatch). This allows us to weigh the
+  // prediciton score and NameMatch appropriately.
+  Scores.ExcludingName = pow(Base, Evaluate(E));
+  // NeedsFixIts is not part of the DecisionForest as generating training
+  // data that needs fixits is not-feasible.
+  if (Relevance.NeedsFixIts)
+    Scores.ExcludingName *= 0.5;
+  // NameMatch should be a multiplier on total score to support rescoring.
+  Scores.Total = Relevance.NameMatch * Scores.ExcludingName;
+  return Scores;
 }
 
 // Produces an integer that sorts in the same order as F.
Index: clang-tools-extra/clangd/CodeComplete.h
===================================================================
--- clang-tools-extra/clangd/CodeComplete.h
+++ clang-tools-extra/clangd/CodeComplete.h
@@ -154,14 +154,21 @@
     DecisionForest,
   } RankingModel = Heuristics;
 
+  /// Callback used to score a CompletionCandidate if DecisionForest ranking
+  /// model is enabled.
+  /// This allows us to inject experimental models and compare them with
+  /// baseline model using A/B testing.
+  std::function<DecisionForestScores(
+      const SymbolQualitySignals &, const SymbolRelevanceSignals &, float Base)>
+      DecisionForestScorer = &evaluateDecisionForest;
   /// Weight for combining NameMatch and Prediction of DecisionForest.
   /// CompletionScore is NameMatch * pow(Base, Prediction).
   /// The optimal value of Base largely depends on the semantics of the model
   /// and prediction score (e.g. algorithm used during training, number of
   /// trees, etc.). Usually if the range of Prediciton is [-20, 20] then a Base
   /// in [1.2, 1.7] works fine.
-  /// Semantics: E.g. the completion score reduces by 50% if the Prediciton
-  /// score is reduced by 2.6 points for Base = 1.3.
+  /// Semantics: E.g. For Base = 1.3, if the Prediciton score reduces by 2.6
+  /// points then completion score reduces by 50% or 1.3^(-2.6).
   float DecisionForestBase = 1.3f;
 };
 
Index: clang-tools-extra/clangd/CodeComplete.cpp
===================================================================
--- clang-tools-extra/clangd/CodeComplete.cpp
+++ clang-tools-extra/clangd/CodeComplete.cpp
@@ -1644,19 +1644,10 @@
       return Scores;
 
     case RM::DecisionForest:
-      Scores.Quality = 0;
-      Scores.Relevance = 0;
-      // Exponentiating DecisionForest prediction makes the score of each tree a
-      // multiplciative boost (like NameMatch). This allows us to weigh the
-      // prediciton score and NameMatch appropriately.
-      Scores.ExcludingName = pow(Opts.DecisionForestBase,
-                                 evaluateDecisionForest(Quality, Relevance));
-      // NeedsFixIts is not part of the DecisionForest as generating training
-      // data that needs fixits is not-feasible.
-      if (Relevance.NeedsFixIts)
-        Scores.ExcludingName *= 0.5;
-      // NameMatch should be a multiplier on total score to support rescoring.
-      Scores.Total = Relevance.NameMatch * Scores.ExcludingName;
+      DecisionForestScores DFScores = Opts.DecisionForestScorer(
+          Quality, Relevance, Opts.DecisionForestBase);
+      Scores.ExcludingName = DFScores.ExcludingName;
+      Scores.Total = DFScores.Total;
       return Scores;
     }
     llvm_unreachable("Unhandled CodeCompletion ranking model.");

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90014: [clangd] Add support for multiple DecisionForest model experiments.

Reply via email to