hokein created this revision.
Herald added a subscriber: mgorny.
Herald added a project: All.
hokein requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added projects: clang, clang-tools-extra.

WARNING: this is an extremely-hacked prototype.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D128812

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/syntax/SyntaxTree.h
  clang-tools-extra/pseudo/lib/CMakeLists.txt
  clang-tools-extra/pseudo/lib/syntax/Build.cpp
  clang-tools-extra/pseudo/lib/syntax/CMakeLists.txt
  clang-tools-extra/pseudo/tool/CMakeLists.txt
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/lib/Tooling/Syntax/Synthesis.cpp

Index: clang/lib/Tooling/Syntax/Synthesis.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Synthesis.cpp
+++ clang/lib/Tooling/Syntax/Synthesis.cpp
@@ -62,6 +62,7 @@
 // Allocates the concrete syntax `Tree` according to its `NodeKind`.
 syntax::Tree *allocateTree(syntax::Arena &A, syntax::NodeKind Kind) {
   switch (Kind) {
+  case syntax::NodeKind::PLeaf:
   case syntax::NodeKind::OLeaf:
   case syntax::NodeKind::Leaf:
     assert(false);
Index: clang/lib/Tooling/Syntax/Nodes.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Nodes.cpp
+++ clang/lib/Tooling/Syntax/Nodes.cpp
@@ -12,6 +12,9 @@
 
 raw_ostream &syntax::operator<<(raw_ostream &OS, NodeKind K) {
   switch (K) {
+  case NodeKind::PLeaf:
+    OS << "PLeaf";
+    break;
 #define CONCRETE_NODE(Kind, Parent)                                            \
   case NodeKind::Kind:                                                         \
     return OS << #Kind;
Index: clang/include/clang/Tooling/Syntax/Nodes.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Nodes.h
+++ clang/include/clang/Tooling/Syntax/Nodes.h
@@ -31,6 +31,7 @@
 /// blocks of enumerator constants must correspond to the inheritance hierarchy
 /// of syntax::Node.
 enum class NodeKind : uint16_t {
+  PLeaf,
 #define CONCRETE_NODE(Kind, Base) Kind,
 #include "clang/Tooling/Syntax/Nodes.inc"
 };
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -14,6 +14,7 @@
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRGraph.h"
 #include "clang-pseudo/grammar/LRTable.h"
+#include "clang-pseudo/syntax/SyntaxTree.h"
 #include "clang/Basic/LangOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
@@ -117,8 +118,12 @@
       auto &Root =
           glrParse(*ParseableStream,
                    clang::pseudo::ParseParams{Lang, Arena, GSS}, *StartSymID);
-      if (PrintForest)
+      if (PrintForest) {
         llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
+        llvm::BumpPtrAllocator A;
+        llvm::outs() << clang::pseudo::dumpSyntaxTree(
+            clang::pseudo::buildSyntaxTree(A, Root, *ParseableStream));
+      }
 
       if (PrintStatistics) {
         llvm::outs() << "Forest bytes: " << Arena.bytes()
Index: clang-tools-extra/pseudo/tool/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/tool/CMakeLists.txt
+++ clang-tools-extra/pseudo/tool/CMakeLists.txt
@@ -13,6 +13,7 @@
   PRIVATE
   clangPseudo
   clangPseudoGrammar
+  clangPseudoSyntax
   clangPseudoCLI
   )
 
Index: clang-tools-extra/pseudo/lib/syntax/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/lib/syntax/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS Support)
+
+add_clang_library(clangPseudoSyntax
+  Build.cpp
+
+  LINK_LIBS
+  clangSyntaxTree
+  clangPseudo
+  clangPseudoGrammar
+  )
+
Index: clang-tools-extra/pseudo/lib/syntax/Build.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/lib/syntax/Build.cpp
@@ -0,0 +1,188 @@
+
+
+#include "clang-pseudo/Forest.h"
+#include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/grammar/Grammar.h"
+#include "clang-pseudo/syntax/SyntaxTree.h"
+#include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/Tree.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace clang {
+namespace syntax {
+class TreeBuilder {
+public:
+  TreeBuilder(llvm::BumpPtrAllocator &Arena) : Arena(Arena) {}
+
+  syntax::Node *build(const pseudo::ForestNode &Node,
+                      const pseudo::TokenStream &Tokens) {
+    for (const auto &T : Tokens.tokens()) {
+      Leaves.push_back(new (Arena) syntax::PLeaf(&T));
+      Leaves.back()->setRole(NodeRole::Unknown);
+      Leaves.back()->Original = Leaves.back()->CanModify = true;
+    }
+
+    return build(&Node, Tokens.tokens().size()).front();
+  }
+
+  void buildForRHS(Tree *Parent, llvm::ArrayRef<const pseudo::ForestNode *> RHS,
+                   pseudo::Token::Index End) {
+    for (size_t I = 0; I < RHS.size(); ++I) {
+      for (auto *Child :
+           build(RHS[I],
+                 I + 1 == RHS.size() ? End : RHS[I + 1]->startTokenIndex())) {
+        // FIXME: setup roles properly.
+        Child->setRole(NodeRole::Unknown);
+        Child->Original = Child->CanModify = true;
+        Parent->appendChildLowLevel(Child);
+      }
+    }
+  }
+
+  std::vector<Node *> build(const pseudo::ForestNode *Node,
+                            pseudo::Token::Index End) {
+    using cxx = pseudo::cxx::Symbol;
+    pseudo::cxx::Symbol CXXSymbol = (pseudo::cxx::Symbol)Node->symbol();
+
+    if (Node->kind() == pseudo::ForestNode::Terminal) {
+      assert(pseudo::isToken(Node->symbol()));
+      return {Leaves[Node->startTokenIndex()]};
+    }
+    if (Node->kind() == pseudo::ForestNode::Ambiguous) {
+      return build(Node->alternatives()[0], End); // select a random one.
+    }
+    // FIXME: handle opaque nodes!
+    const auto &Sequence = Node->elements();
+
+    if (CXXSymbol == cxx::translation_unit) {
+      syntax::TranslationUnit *R = new (Arena) syntax::TranslationUnit();
+      buildForRHS(R, Sequence, End);
+      return {R};
+    }
+    if (CXXSymbol == cxx::simple_declaration) {
+      auto *SD = new (Arena) syntax::SimpleDeclaration();
+      buildForRHS(SD, Sequence, End);
+      return {SD};
+    }
+    if (CXXSymbol == cxx::compound_statement) {
+      auto *CS = new (Arena) syntax::CompoundStatement();
+      buildForRHS(CS, Sequence, End);
+      return {CS};
+    }
+
+    if (Sequence.size() > 1) {
+      switch (CXXSymbol) {
+      case pseudo::cxx::Symbol::additive_expression:
+      case pseudo::cxx::Symbol::and_expression:
+      case pseudo::cxx::Symbol::assignment_expression:
+      case pseudo::cxx::Symbol::compare_expression:
+      case pseudo::cxx::Symbol::constraint_logical_and_expression:
+      case pseudo::cxx::Symbol::constraint_logical_or_expression:
+      case pseudo::cxx::Symbol::equality_expression:
+      case pseudo::cxx::Symbol::exclusive_or_expression:
+      case pseudo::cxx::Symbol::inclusive_or_expression:
+      case pseudo::cxx::Symbol::logical_and_expression:
+      case pseudo::cxx::Symbol::logical_or_expression:
+      case pseudo::cxx::Symbol::multiplicative_expression:
+      case pseudo::cxx::Symbol::pm_expression:
+      case pseudo::cxx::Symbol::relational_expression:
+      case pseudo::cxx::Symbol::shift_expression: {
+        auto *BOE = new (Arena) syntax::BinaryOperatorExpression();
+        buildForRHS(BOE, Sequence, End);
+        return {BOE};
+      }
+      default:
+        break;
+      }
+    }
+
+    // For sequence, we want to build a flat list of them.
+    // And fallback mechanism for unsupported syntax nodes.
+    std::vector<syntax::Node *> Results;
+    for (size_t I = 0; I < Sequence.size(); ++I) {
+      for (auto *E :
+           build(Sequence[I], I + 1 == Sequence.size()
+                                  ? End
+                                  : Sequence[I + 1]->startTokenIndex())) {
+        Results.push_back(E);
+      }
+    }
+    return Results;
+  }
+
+  llvm::BumpPtrAllocator &Arena;
+  std::vector<syntax::Node *> Leaves;
+};
+} // namespace syntax
+namespace pseudo {
+
+static void dumpLeaf(raw_ostream &OS, const syntax::PLeaf *L) {
+  assert(L);
+  const auto *Token = L->getToken();
+  assert(Token);
+  // Handle 'eof' separately, calling text() on it produces an empty string.
+  if (L->getToken()->Kind == tok::eof)
+    OS << "<eof>";
+  else
+    OS << L->getToken()->text();
+}
+
+static void dumpNode(raw_ostream &OS, const syntax::Node *N,
+                     llvm::BitVector IndentMask) {
+  auto DumpExtraInfo = [&OS](const syntax::Node *N) {
+    if (N->getRole() != syntax::NodeRole::Unknown)
+      OS << " " << N->getRole();
+    if (!N->isOriginal())
+      OS << " synthesized";
+    if (!N->canModify())
+      OS << " unmodifiable";
+  };
+
+  assert(N);
+  if (const auto *L = dyn_cast<syntax::PLeaf>(N)) {
+    OS << "'";
+    dumpLeaf(OS, L);
+    OS << "'";
+    DumpExtraInfo(N);
+    OS << "\n";
+    return;
+  }
+
+  const auto *T = cast<syntax::Tree>(N);
+  OS << T->getKind();
+  DumpExtraInfo(N);
+  OS << "\n";
+
+  for (const syntax::Node &It : T->getChildren()) {
+    for (unsigned Idx = 0; Idx < IndentMask.size(); ++Idx) {
+      if (IndentMask[Idx])
+        OS << "| ";
+      else
+        OS << "  ";
+    }
+    if (!It.getNextSibling()) {
+      OS << "`-";
+      IndentMask.push_back(false);
+    } else {
+      OS << "|-";
+      IndentMask.push_back(true);
+    }
+    dumpNode(OS, &It, IndentMask);
+    IndentMask.pop_back();
+  }
+}
+
+std::string dumpSyntaxTree(const syntax::Node *T) {
+  std::string Str;
+  llvm::raw_string_ostream OS(Str);
+  dumpNode(OS, T, /*IndentMask=*/{});
+  return std::move(OS.str());
+}
+syntax::Node *buildSyntaxTree(llvm::BumpPtrAllocator &Arena,
+                              const ForestNode &Node,
+                              const TokenStream &Tokens) {
+  return syntax::TreeBuilder(Arena).build(Node, Tokens);
+  return nullptr;
+}
+} // namespace pseudo
+} // namespace clang
Index: clang-tools-extra/pseudo/lib/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_subdirectory(cli)
 add_subdirectory(cxx)
 add_subdirectory(grammar)
+add_subdirectory(syntax)
 
 set(LLVM_LINK_COMPONENTS Support)
 
Index: clang-tools-extra/pseudo/include/clang-pseudo/syntax/SyntaxTree.h
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/clang-pseudo/syntax/SyntaxTree.h
@@ -0,0 +1,37 @@
+#ifndef CLANG_PSEUDO_SYNTAX_SYNTAX_TREE_H
+#define CLANG_PSEUDO_SYNTAX_SYNTAX_TREE_H
+
+#include "clang-pseudo/Forest.h"
+#include "clang-pseudo/Token.h"
+#include "clang/Tooling/Syntax/Nodes.h"
+
+namespace clang {
+namespace syntax {
+
+class PLeaf : public clang::syntax::Leaf {
+public:
+  PLeaf(const pseudo::Token *Tok) : Leaf(NodeKind::PLeaf), Tok(Tok) {
+    assert(Tok != nullptr);
+  }
+  static bool classof(const Node *N) { return N->getKind() == NodeKind::PLeaf; }
+  const pseudo::Token *getToken() const { return Tok; }
+
+private:
+  const pseudo::Token *Tok;
+};
+} // namespace syntax
+
+namespace pseudo {
+
+std::string dumpSyntaxTree(const syntax::Node *T);
+
+/// Build a syntax tree for the main file.
+/// This usually covers the whole TranslationUnitDecl, but can be restricted by
+/// the ASTContext's traversal scope.
+syntax::Node *buildSyntaxTree(llvm::BumpPtrAllocator &Arena,
+                              const ForestNode &Node,
+                              const TokenStream &Tokens);
+} // namespace pseudo
+} // namespace clang
+
+#endif
\ No newline at end of file
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to