https://github.com/jeremy-rifkin updated https://github.com/llvm/llvm-project/pull/87898
>From 2ebb15e08b5e2d8a9fe6cfddbe0dd2a8942b2542 Mon Sep 17 00:00:00 2001 From: Jeremy <51220084+jeremy-rif...@users.noreply.github.com> Date: Sat, 6 Apr 2024 17:02:20 -0500 Subject: [PATCH 1/2] Add a --print-terminal-tokens option --- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp | 2 +- .../pseudo/include/clang-pseudo/Forest.h | 11 ++++++-- clang-tools-extra/pseudo/lib/Forest.cpp | 26 +++++++++++++------ clang-tools-extra/pseudo/tool/ClangPseudo.cpp | 12 +++++++-- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp index 87b9d15480cc35..33b3da1ed6ea9f 100644 --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -46,7 +46,7 @@ class Fuzzer { glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS}, *Lang.G.findNonterminal("translation-unit"), Lang); if (Print) - llvm::outs() << Root.dumpRecursive(Lang.G); + llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt); } }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h index e9edb40e02b64e..642c489b3fba41 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include <cstdint> +#include <functional> +#include <optional> namespace clang { namespace pseudo { @@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode { // Iteration over all nodes in the forest, including this. llvm::iterator_range<RecursiveIterator> descendants() const; - std::string dump(const Grammar &) const; - std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; + std::string + dump(const Grammar &, + std::optional<std::reference_wrapper<const TokenStream>>) const; + std::string + dumpRecursive(const Grammar &, + std::optional<std::reference_wrapper<const TokenStream>>, + bool Abbreviated = false) const; private: friend class ForestArena; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp index e8e60e5ec475a4..adce731d6c1e1c 100644 --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -45,13 +45,21 @@ ForestNode::descendants() const { return {RecursiveIterator(this), RecursiveIterator()}; } -std::string ForestNode::dump(const Grammar &G) const { +std::string ForestNode::dump( + const Grammar &G, + std::optional<std::reference_wrapper<const TokenStream>> Code) const { switch (kind()) { case Ambiguous: return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol())); case Terminal: - return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), - startTokenIndex()); + if (Code) { + return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()), + startTokenIndex(), + Code->get().tokens()[startTokenIndex()]); + } else { + return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), + startTokenIndex()); + } case Sequence: return G.dumpRule(rule()); case Opaque: @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar &G) const { llvm_unreachable("Unhandled node kind!"); } -std::string ForestNode::dumpRecursive(const Grammar &G, - bool Abbreviated) const { +std::string ForestNode::dumpRecursive( + const Grammar &G, + std::optional<std::reference_wrapper<const TokenStream>> Code, + bool Abbreviated) const { using llvm::formatv; Token::Index MaxToken = 0; // Count visits of nodes so we can mark those seen multiple times. @@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G, std::string Result; constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max(); std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>, - LineDecoration &LineDec)> + LineDecoration LineDec)> Dump = [&](const ForestNode *P, Token::Index End, std::optional<SymbolID> ElidedParent, LineDecoration LineDec) { bool SharedNode = VisitCounts.find(P)->getSecond() > 1; @@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar &G, // The first time, print as #1. Later, =#1. if (First) { - Result += formatv("{0} #{1}", P->dump(G), ID); + Result += formatv("{0} #{1}", P->dump(G, Code), ID); } else { Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID); Children = {}; // Don't walk the children again. } } else { - Result.append(P->dump(G)); + Result.append(P->dump(G, Code)); } Result.push_back('\n'); diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index 6a64760749cefe..4797dc01cdc13b 100644 --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -51,6 +51,9 @@ static opt<bool> Disambiguate("disambiguate", desc("Choose best tree from parse forest")); static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics")); static opt<bool> PrintForest("print-forest", desc("Print parse forest")); +static opt<bool> + PrintTerminalTokens("print-terminal-tokens", + desc("Print terminal tokens in parse forest")); static opt<bool> ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"), init(true)); static opt<std::string> HTMLForest("html-forest", @@ -161,9 +164,14 @@ int main(int argc, char *argv[]) { auto &Root = glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS}, *StartSymID, Lang); + std::optional<std::reference_wrapper<const TokenStream>> Code; + if (PrintTerminalTokens) { + Code = *ParseableStream; + } // If we're disambiguating, we'll print at the end instead. if (PrintForest && !Disambiguate) - llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev); + llvm::outs() << Root.dumpRecursive(Lang.G, Code, + /*Abbreviated=*/ForestAbbrev); clang::pseudo::Disambiguation Disambig; if (Disambiguate) Disambig = clang::pseudo::disambiguate(&Root, {}); @@ -234,7 +242,7 @@ int main(int argc, char *argv[]) { ForestNode *DisambigRoot = &Root; removeAmbiguities(DisambigRoot, Disambig); llvm::outs() << "Disambiguated tree:\n"; - llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, + llvm::outs() << DisambigRoot->dumpRecursive(Lang.G, Code, /*Abbreviated=*/ForestAbbrev); } } >From ed5e37ba210ea76c35d20f3d14cc985e987fa8fd Mon Sep 17 00:00:00 2001 From: Jeremy <51220084+jeremy-rif...@users.noreply.github.com> Date: Sat, 6 Apr 2024 19:41:45 -0500 Subject: [PATCH 2/2] Fix a LLVM_DEBUG --- clang-tools-extra/pseudo/lib/GLR.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp index ac43c02db521eb..e4b5be79d7e58d 100644 --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -527,7 +527,8 @@ class GLRReduce { SequenceNodes.size() == 1 ? SequenceNodes.front() : &Params.Forest.createAmbiguous(F.Symbol, SequenceNodes); - LLVM_DEBUG(llvm::dbgs() << " --> " << Parsed->dump(Lang.G) << "\n"); + LLVM_DEBUG(llvm::dbgs() + << " --> " << Parsed->dump(Lang.G, std::nullopt) << "\n"); // Bases for this family, deduplicate them, and group by the goTo State. sortAndUnique(FamilyBases); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits