usaxena95 created this revision. Herald added subscribers: cfe-commits, kadircet, arphaman. Herald added a project: clang. usaxena95 requested review of this revision. Herald added subscribers: MaskRay, ilya-biryukov.
With every incremental change, one needs to check-in new model upstream. This also significantly increases the size of the git repo with every new model. Testing and comparing the old and previous model is also not possible as we run only a single model at any point. One solution is to have a "staging" decision forest which can be injected into clangd without pushing it to upstream. Compare the performance of the staging model with the live model. After a couple of enhancements have been done to staging model, we can then replace the live model upstream with the staging model. This reduces upstream churn and also allows us to compare models with current baseline model. This is done by having a callback in CodeCompleteOptions which is called only when we want to use a decision forest ranking model. This allows us to inject different completion model internally. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D90014 Files: clang-tools-extra/clangd/CodeComplete.cpp clang-tools-extra/clangd/CodeComplete.h clang-tools-extra/clangd/Quality.cpp clang-tools-extra/clangd/Quality.h
Index: clang-tools-extra/clangd/Quality.h =================================================================== --- clang-tools-extra/clangd/Quality.h +++ clang-tools-extra/clangd/Quality.h @@ -165,8 +165,18 @@ /// Combine symbol quality and relevance into a single score. float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); -float evaluateDecisionForest(const SymbolQualitySignals &Quality, - const SymbolRelevanceSignals &Relevance); +/// Same semantics as CodeComplete::Score. Quality score and Relevance score +/// have been removed since DecisionForest cannot assign individual scores to +/// Quality and Relevance signals. +struct DecisionForestScores { + float Total = 0.f; + float ExcludingName = 0.f; +}; + +DecisionForestScores +evaluateDecisionForest(const SymbolQualitySignals &Quality, + const SymbolRelevanceSignals &Relevance, float Base); + /// TopN<T> is a lossy container that preserves only the "best" N elements. template <typename T, typename Compare = std::greater<T>> class TopN { public: Index: clang-tools-extra/clangd/Quality.cpp =================================================================== --- clang-tools-extra/clangd/Quality.cpp +++ clang-tools-extra/clangd/Quality.cpp @@ -487,8 +487,9 @@ return SymbolQuality * SymbolRelevance; } -float evaluateDecisionForest(const SymbolQualitySignals &Quality, - const SymbolRelevanceSignals &Relevance) { +DecisionForestScores +evaluateDecisionForest(const SymbolQualitySignals &Quality, + const SymbolRelevanceSignals &Relevance, float Base) { Example E; E.setIsDeprecated(Quality.Deprecated); E.setIsReservedName(Quality.ReservedName); @@ -512,7 +513,19 @@ E.setHadSymbolType(Relevance.HadSymbolType); E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred); E.setFilterLength(Relevance.FilterLength); - return Evaluate(E); + + DecisionForestScores Scores; + // Exponentiating DecisionForest prediction makes the score of each tree a + // multiplciative boost (like NameMatch). This allows us to weigh the + // prediciton score and NameMatch appropriately. + Scores.ExcludingName = pow(Base, Evaluate(E)); + // NeedsFixIts is not part of the DecisionForest as generating training + // data that needs fixits is not-feasible. + if (Relevance.NeedsFixIts) + Scores.ExcludingName *= 0.5; + // NameMatch should be a multiplier on total score to support rescoring. + Scores.Total = Relevance.NameMatch * Scores.ExcludingName; + return Scores; } // Produces an integer that sorts in the same order as F. Index: clang-tools-extra/clangd/CodeComplete.h =================================================================== --- clang-tools-extra/clangd/CodeComplete.h +++ clang-tools-extra/clangd/CodeComplete.h @@ -154,14 +154,17 @@ DecisionForest, } RankingModel = Heuristics; + std::function<DecisionForestScores( + const SymbolQualitySignals &, const SymbolRelevanceSignals &, float Base)> + DecisionForestScorer = &evaluateDecisionForest; /// Weight for combining NameMatch and Prediction of DecisionForest. /// CompletionScore is NameMatch * pow(Base, Prediction). /// The optimal value of Base largely depends on the semantics of the model /// and prediction score (e.g. algorithm used during training, number of /// trees, etc.). Usually if the range of Prediciton is [-20, 20] then a Base /// in [1.2, 1.7] works fine. - /// Semantics: E.g. the completion score reduces by 50% if the Prediciton - /// score is reduced by 2.6 points for Base = 1.3. + /// Semantics: E.g. For Base = 1.3, if the Prediciton score reduces by 2.6 + /// points then completion score reduces by 50% or 1.3^(-2.6). float DecisionForestBase = 1.3f; }; Index: clang-tools-extra/clangd/CodeComplete.cpp =================================================================== --- clang-tools-extra/clangd/CodeComplete.cpp +++ clang-tools-extra/clangd/CodeComplete.cpp @@ -1644,19 +1644,10 @@ return Scores; case RM::DecisionForest: - Scores.Quality = 0; - Scores.Relevance = 0; - // Exponentiating DecisionForest prediction makes the score of each tree a - // multiplciative boost (like NameMatch). This allows us to weigh the - // prediciton score and NameMatch appropriately. - Scores.ExcludingName = pow(Opts.DecisionForestBase, - evaluateDecisionForest(Quality, Relevance)); - // NeedsFixIts is not part of the DecisionForest as generating training - // data that needs fixits is not-feasible. - if (Relevance.NeedsFixIts) - Scores.ExcludingName *= 0.5; - // NameMatch should be a multiplier on total score to support rescoring. - Scores.Total = Relevance.NameMatch * Scores.ExcludingName; + DecisionForestScores DFScores = Opts.DecisionForestScorer( + Quality, Relevance, Opts.DecisionForestBase); + Scores.ExcludingName = DFScores.ExcludingName; + Scores.Total = DFScores.Total; return Scores; } llvm_unreachable("Unhandled CodeCompletion ranking model.");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits