Author: Sam McCall Date: 2022-07-22T10:35:06+02:00 New Revision: d9d554a3f4640e8f1ed5c0ae408740861715b897
URL: https://github.com/llvm/llvm-project/commit/d9d554a3f4640e8f1ed5c0ae408740861715b897 DIFF: https://github.com/llvm/llvm-project/commit/d9d554a3f4640e8f1ed5c0ae408740861715b897.diff LOG: [pseudo] Add ambiguity & unparseability metrics to -print-statistics These can be used to quantify parsing improvements from a change. Differential Revision: https://reviews.llvm.org/D130199 Added: Modified: clang-tools-extra/pseudo/test/glr.cpp clang-tools-extra/pseudo/tool/ClangPseudo.cpp Removed: ################################################################################ diff --git a/clang-tools-extra/pseudo/test/glr.cpp b/clang-tools-extra/pseudo/test/glr.cpp index 24b2ac05f6f1..221725c6f089 100644 --- a/clang-tools-extra/pseudo/test/glr.cpp +++ b/clang-tools-extra/pseudo/test/glr.cpp @@ -29,3 +29,6 @@ void foo() { // CHECK-NEXT: 1 type-name // CHECK-EMPTY: // CHECK-NEXT: 0 Opaque nodes: +// CHECK-EMPTY: +// CHECK-NEXT: Ambiguity: 0.40 misparses/token +// CHECK-NEXT: Unparsed: 0.00% diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index 2a2d8eda4c20..294098a3a5c1 100644 --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Signals.h" +using clang::pseudo::ForestNode; +using clang::pseudo::Token; using clang::pseudo::TokenStream; using llvm::cl::desc; using llvm::cl::init; @@ -174,9 +176,8 @@ int main(int argc, char *argv[]) { llvm::outs() << "GSS bytes: " << GSS.bytes() << " nodes: " << GSS.nodesCreated() << "\n"; - for (auto &P : - {std::make_pair("Ambiguous", clang::pseudo::ForestNode::Ambiguous), - std::make_pair("Opaque", clang::pseudo::ForestNode::Opaque)}) { + for (auto &P : {std::make_pair("Ambiguous", ForestNode::Ambiguous), + std::make_pair("Opaque", ForestNode::Opaque)}) { clang::pseudo::NodeStats Stats( Root, [&](const auto &N) { return N.kind() == P.second; }); llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n"; @@ -184,6 +185,39 @@ int main(int argc, char *argv[]) { llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second, Lang.G.symbolName(S.first)); } + + // Metrics for how imprecise parsing was. + // These are rough but aim to be: + // - linear: if we eliminate half the errors the metric should halve + // - length-independent + unsigned UnparsedTokens = 0; // Tokens covered by Opaque. (not unique) + unsigned Misparses = 0; // Sum of alternatives-1 + llvm::DenseSet<const ForestNode *> Visited; + auto DFS = [&](const ForestNode &N, Token::Index End, auto &DFS) -> void { + if (N.kind() == ForestNode::Opaque) { + UnparsedTokens += End - N.startTokenIndex(); + } else if (N.kind() == ForestNode::Ambiguous) { + Misparses += N.alternatives().size() - 1; + for (const auto *C : N.alternatives()) + if (Visited.insert(C).second) + DFS(*C, End, DFS); + } else if (N.kind() == ForestNode::Sequence) { + for (unsigned I = 0, E = N.children().size(); I < E; ++I) + if (Visited.insert(N.children()[I]).second) + DFS(*N.children()[I], + I + 1 == N.children().size() + ? End + : N.children()[I + 1]->startTokenIndex(), + DFS); + } + }; + unsigned Len = ParseableStream->tokens().size(); + DFS(Root, Len, DFS); + llvm::outs() << "\n"; + llvm::outs() << llvm::formatv("Ambiguity: {0} misparses/token\n", + double(Misparses) / Len); + llvm::outs() << llvm::formatv("Unparsed: {0}%\n", + 100.0 * UnparsedTokens / Len); } } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits