ilya-biryukov created this revision.
ilya-biryukov added a reviewer: sammccall.
Herald added a project: clang.
ilya-biryukov added a parent revision: D61637: [Syntax] Introduce syntax trees.

Most of the statements mirror the ones provided by clang AST.
Major differences are:

- expressions are wrapped into 'ExpressionStatement' instead of being a 
subclass of statement,
- semicolons are always consumed by the leaf expressions (return, expression 
satement, etc),
- some clang statements are not handled yet, we wrap those into an 
UnknownStatement class, which is not present in clang.

We also define an 'Expression' and 'UnknownExpression' classes in order
to produce 'ExpressionStatement' where needed. The actual implementation
of expressions is not yet ready, it will follow later.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -137,7 +137,7 @@
 | |-)
 | `-CompoundStatement
 |   |-1: {
-|   `-2: }
+|   `-3: }
 |-TopLevelDeclaration
 | |-void
 | |-foo
@@ -145,10 +145,317 @@
 | |-)
 | `-CompoundStatement
 |   |-1: {
-|   `-2: }
+|   `-3: }
 `-1: <eof>
 )txt"},
-  };
+      // if.
+      {
+          R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+        )cpp",
+          R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-int
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: IfStatement
+|   | |-1: if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   `-3: }
+|   |-2: IfStatement
+|   | |-1: if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | |-2: CompoundStatement
+|   | | |-1: {
+|   | | `-3: }
+|   | |-3: else
+|   | `-4: IfStatement
+|   |   |-1: if
+|   |   |-(
+|   |   |-UnknownExpression
+|   |   | `-false
+|   |   |-)
+|   |   `-2: CompoundStatement
+|   |     |-1: {
+|   |     `-3: }
+|   `-3: }
+`-1: <eof>
+        )txt"},
+      // for.
+      {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+       R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: ForStatement
+|   | |-1: for
+|   | |-(
+|   | |-;
+|   | |-;
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   `-3: }
+|   `-3: }
+`-1: <eof>
+        )txt"},
+      // declaration statement.
+      {"void test() { int a = 10; }",
+       R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-=
+|   | |-10
+|   | `-;
+|   `-3: }
+`-1: <eof>
+)txt"},
+      {"void test() { ; }", R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: EmptyStatement
+|   | `-;
+|   `-3: }
+`-1: <eof>
+)txt"},
+      // switch, case and default.
+      {R"cpp(
+void test() {
+  switch (true) {
+    case 0:
+    default:;
+  }
+}
+)cpp",
+       R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: SwitchStatement
+|   | |-1: switch
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   |-2: CaseStatement
+|   |   | |-1: case
+|   |   | |-UnknownExpression
+|   |   | | `-0
+|   |   | |-:
+|   |   | `-2: DefaultStatement
+|   |   |   |-1: default
+|   |   |   |-:
+|   |   |   `-2: EmptyStatement
+|   |   |     `-;
+|   |   `-3: }
+|   `-3: }
+`-1: <eof>
+)txt"},
+      // while.
+      {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+       R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: WhileStatement
+|   | |-1: while
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-2: CompoundStatement
+|   |   |-1: {
+|   |   |-2: ContinueStatement
+|   |   | |-1: continue
+|   |   | `-;
+|   |   |-2: BreakStatement
+|   |   | |-1: break
+|   |   | `-;
+|   |   `-3: }
+|   `-3: }
+`-1: <eof>
+)txt"},
+      // return.
+      {R"cpp(
+int test() { return 1; }
+      )cpp",
+       R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-int
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: ReturnStatement
+|   | |-1: return
+|   | |-UnknownExpression
+|   | | `-1
+|   | `-;
+|   `-3: }
+`-1: <eof>
+       )txt"},
+      // Range-based for.
+      {R"cpp(
+void test() {
+  int a[3];
+  for (int x : a) ;
+}
+      )cpp",
+       R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-[
+|   | |-3
+|   | |-]
+|   | `-;
+|   |-2: RangeBasedForStatement
+|   | |-1: for
+|   | |-(
+|   | |-int
+|   | |-x
+|   | |-:
+|   | |-UnknownExpression
+|   | | `-a
+|   | |-)
+|   | `-2: EmptyStatement
+|   |   `-;
+|   `-3: }
+`-1: <eof>
+       )txt"},
+      // Unhandled statements should end up as 'unknown statement'.
+      // This example uses a 'label statement', which does not yet have a syntax
+      // counterpart.
+      {"void main() { foo: return 100; }", R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: UnknownStatement
+|   | |-foo
+|   | |-:
+|   | `-ReturnStatement
+|   |   |-1: return
+|   |   |-UnknownExpression
+|   |   | `-100
+|   |   `-;
+|   `-3: }
+`-1: <eof>
+)txt"},
+      // expressions should be wrapped in 'ExpressionStatement' when they appear
+      // in a statement position.
+      {R"cpp(
+void test() {
+  test();
+  if (true) test(); else test();
+}
+    )cpp",
+       R"txt(
+*: TranslationUnitDeclaration
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-1: {
+|   |-2: ExpressionStatement
+|   | |-1: UnknownExpression
+|   | | |-test
+|   | | |-(
+|   | | `-)
+|   | `-;
+|   |-2: IfStatement
+|   | |-1: if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | |-2: ExpressionStatement
+|   | | |-1: UnknownExpression
+|   | | | |-test
+|   | | | |-(
+|   | | | `-)
+|   | | `-;
+|   | |-3: else
+|   | `-4: ExpressionStatement
+|   |   |-1: UnknownExpression
+|   |   | |-test
+|   |   | |-(
+|   |   | `-)
+|   |   `-;
+|   `-3: }
+`-1: <eof>
+       )txt"}};
 
   for (const auto &T : Cases) {
     auto *Root = buildTree(T.first);
Index: clang/lib/Tooling/Syntax/Nodes.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Nodes.cpp
+++ clang/lib/Tooling/Syntax/Nodes.cpp
@@ -18,6 +18,36 @@
     return OS << "TranslationUnitDeclaration";
   case NodeKind::TopLevelDeclaration:
     return OS << "TopLevelDeclaration";
+  case NodeKind::UnknownExpression:
+    return OS << "UnknownExpression";
+  case NodeKind::UnknownStatement:
+    return OS << "UnknownStatement";
+  case NodeKind::DeclarationStatement:
+    return OS << "DeclarationStatement";
+  case NodeKind::EmptyStatement:
+    return OS << "EmptyStatement";
+  case NodeKind::SwitchStatement:
+    return OS << "SwitchStatement";
+  case NodeKind::CaseStatement:
+    return OS << "CaseStatement";
+  case NodeKind::DefaultStatement:
+    return OS << "DefaultStatement";
+  case NodeKind::IfStatement:
+    return OS << "IfStatement";
+  case NodeKind::ForStatement:
+    return OS << "ForStatement";
+  case NodeKind::WhileStatement:
+    return OS << "WhileStatement";
+  case NodeKind::ContinueStatement:
+    return OS << "ContinueStatement";
+  case NodeKind::BreakStatement:
+    return OS << "BreakStatement";
+  case NodeKind::ReturnStatement:
+    return OS << "ReturnStatement";
+  case NodeKind::RangeBasedForStatement:
+    return OS << "RangeBasedForStatement";
+  case NodeKind::ExpressionStatement:
+    return OS << "ExpressionStatement";
   case NodeKind::CompoundStatement:
     return OS << "CompoundStatement";
   }
@@ -28,10 +58,99 @@
   return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::eof));
 }
 
+syntax::Leaf *syntax::SwitchStatement::switchKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::switchKeyword));
+}
+
+syntax::Statement *syntax::SwitchStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::body));
+}
+
+syntax::Leaf *syntax::CaseStatement::caseKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::caseKeyword));
+}
+
+syntax::Statement *syntax::CaseStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::body));
+}
+
+syntax::Leaf *syntax::DefaultStatement::defaultKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::defaultKeyword));
+}
+
+syntax::Statement *syntax::DefaultStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::body));
+}
+
+syntax::Leaf *syntax::IfStatement::ifKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::ifKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::thenStatement() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::thenStatement));
+}
+
+syntax::Leaf *syntax::IfStatement::elseKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::elseKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::elseStatement() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::elseStatement));
+}
+
+syntax::Leaf *syntax::ForStatement::forKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::forKeyword));
+}
+
+syntax::Statement *syntax::ForStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::body));
+}
+
+syntax::Leaf *syntax::WhileStatement::whileKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::whileKeyword));
+}
+
+syntax::Statement *syntax::WhileStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::body));
+}
+
+syntax::Leaf *syntax::ContinueStatement::continueKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::continueKeyword));
+}
+
+syntax::Leaf *syntax::BreakStatement::breakKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::breakKeyword));
+}
+
+syntax::Leaf *syntax::ReturnStatement::returnKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::returnKeyword));
+}
+
+syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::forKeyword));
+}
+
+syntax::Statement *syntax::RangeBasedForStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(findChild(Roles::body));
+}
+
+syntax::Expression *syntax::ExpressionStatement::expression() {
+  return llvm::cast_or_null<syntax::Expression>(findChild(Roles::expression));
+}
+
 syntax::Leaf *syntax::CompoundStatement::lbrace() {
   return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::lbrace));
 }
 
+std::vector<syntax::Statement *> syntax::CompoundStatement::statements() {
+  std::vector<syntax::Statement *> Children;
+  for (auto *C = firstChild(); C; C = C->nextSibling()) {
+    if (C->role() == Roles::statement)
+      Children.push_back(llvm::cast<syntax::Statement>(C));
+  }
+  return Children;
+}
+
 syntax::Leaf *syntax::CompoundStatement::rbrace() {
   return llvm::cast_or_null<syntax::Leaf>(findChild(Roles::rbrace));
 }
Index: clang/lib/Tooling/Syntax/BuildTree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/BuildTree.cpp
+++ clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -27,6 +27,8 @@
 
 using namespace clang;
 
+static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
+
 /// A helper class for constructing the syntax tree while traversing a clang
 /// AST.
 ///
@@ -52,6 +54,10 @@
   /// Range.
   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
 
+  /// Mark the \p Child node with a corresponding \p Role. All marked children
+  /// should be consumed by foldNode.
+  void markChild(Stmt *Child, NodeRole Role);
+
   /// Set role for a token starting at \p Loc.
   void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
 
@@ -84,8 +90,23 @@
   llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
     return getRange(D->getBeginLoc(), D->getEndLoc());
   }
+  llvm::ArrayRef<syntax::Token> getRange(const Expr *E) const {
+    return getRange(E->getBeginLoc(), E->getEndLoc());
+  }
+  /// Find the adjusted range for the statement, consuming the trailing
+  /// semicolon when needed.
   llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const {
-    return getRange(S->getBeginLoc(), S->getEndLoc());
+    auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
+    if (isa<CompoundStmt>(S))
+      return Tokens;
+
+    // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
+    // all statements that end with those. Consume this semicolon here.
+    //
+    // (!) statements never consume 'eof', so looking at the next token is ok.
+    if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
+      return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
+    return Tokens;
   }
 
 private:
@@ -226,6 +247,8 @@
     using Roles = syntax::CompoundStatement::Roles;
 
     Builder.markChildToken(S->getLBracLoc(), tok::l_brace, Roles::lbrace);
+    for (auto *Child : S->body())
+      Builder.markChild(Child, Roles::statement);
     Builder.markChildToken(S->getRBracLoc(), tok::r_brace, Roles::rbrace);
 
     Builder.foldNode(Builder.getRange(S),
@@ -233,6 +256,159 @@
     return true;
   }
 
+  // Some statements are not yet handled by syntax trees.
+  bool WalkUpFromStmt(Stmt *S) {
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::UnknownStatement);
+    return true;
+  }
+
+  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
+    // We override to traverse range initializer as VarDecl.
+    // RAT traverses it as a statement, we produce invalid node kinds in that
+    // case.
+    // FIXME: should do this in RAT instead?
+    if (S->getInit() && !TraverseStmt(S->getInit()))
+      return false;
+    if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
+      return false;
+    if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
+      return false;
+    if (S->getBody() && !TraverseStmt(S->getBody()))
+      return false;
+    return true;
+  }
+
+  // Some expressions are not yet handled by syntax trees.
+  bool WalkUpFromExpr(Expr *E) {
+    assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
+    Builder.foldNode(Builder.getRange(E),
+                     new (allocator()) syntax::UnknownExpression);
+    return true;
+  }
+
+  bool TraverseStmt(Stmt *S) {
+    if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
+      // (!) do not recurse into subexpressions.
+      // we do not have syntax trees for expressions yet, so we only want to see
+      // the first top-level expression.
+      return WalkUpFromExpr(E->IgnoreImplicit());
+    }
+    return RecursiveASTVisitor::TraverseStmt(S);
+  }
+
+  // The code below is very regular, it could even be generated with some
+  // preprocessor magic. We merely assign roles to the corresponding children
+  // and fold resulting nodes.
+
+  bool WalkUpFromDeclStmt(DeclStmt *S) {
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::DeclarationStatement);
+    return true;
+  }
+
+  bool WalkUpFromNullStmt(NullStmt *S) {
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::EmptyStatement);
+    return true;
+  }
+
+  bool WalkUpFromSwitchStmt(SwitchStmt *S) {
+    using Roles = syntax::SwitchStatement::Roles;
+    Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch,
+                           Roles::switchKeyword);
+    Builder.markChild(S->getBody(), Roles::body);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::SwitchStatement);
+    return true;
+  }
+
+  bool WalkUpFromCaseStmt(CaseStmt *S) {
+    using Roles = syntax::CaseStatement::Roles;
+    Builder.markChildToken(S->getKeywordLoc(), tok::kw_case,
+                           Roles::caseKeyword);
+    Builder.markChild(S->getSubStmt(), Roles::body);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::CaseStatement);
+    return true;
+  }
+
+  bool WalkUpFromDefaultStmt(DefaultStmt *S) {
+    using Roles = syntax::DefaultStatement::Roles;
+    Builder.markChildToken(S->getKeywordLoc(), tok::kw_default,
+                           Roles::defaultKeyword);
+    Builder.markChild(S->getSubStmt(), Roles::body);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::DefaultStatement);
+    return true;
+  }
+
+  bool WalkUpFromIfStmt(IfStmt *S) {
+    using Roles = syntax::IfStatement::Roles;
+    Builder.markChildToken(S->getIfLoc(), tok::kw_if, Roles::ifKeyword);
+    Builder.markChild(S->getThen(), Roles::thenStatement);
+    Builder.markChildToken(S->getElseLoc(), tok::kw_else, Roles::elseKeyword);
+    Builder.markChild(S->getElse(), Roles::elseStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::IfStatement);
+    return true;
+  }
+
+  bool WalkUpFromForStmt(ForStmt *S) {
+    using Roles = syntax::ForStatement::Roles;
+    Builder.markChildToken(S->getForLoc(), tok::kw_for, Roles::forKeyword);
+    Builder.markChild(S->getBody(), Roles::body);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::ForStatement);
+    return true;
+  }
+
+  bool WalkUpFromWhileStmt(WhileStmt *S) {
+    using Roles = syntax::WhileStatement::Roles;
+    Builder.markChildToken(S->getWhileLoc(), tok::kw_while,
+                           Roles::whileKeyword);
+    Builder.markChild(S->getBody(), Roles::body);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::WhileStatement);
+    return true;
+  }
+
+  bool WalkUpFromContinueStmt(ContinueStmt *S) {
+    using Roles = syntax::ContinueStatement::Roles;
+    Builder.markChildToken(S->getContinueLoc(), tok::kw_continue,
+                           Roles::continueKeyword);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::ContinueStatement);
+    return true;
+  }
+
+  bool WalkUpFromBreakStmt(BreakStmt *S) {
+    using Roles = syntax::BreakStatement::Roles;
+    Builder.markChildToken(S->getBreakLoc(), tok::kw_break,
+                           Roles::breakKeyword);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::BreakStatement);
+    return true;
+  }
+
+  bool WalkUpFromReturnStmt(ReturnStmt *S) {
+    using Roles = syntax::ReturnStatement::Roles;
+    Builder.markChildToken(S->getReturnLoc(), tok::kw_return,
+                           Roles::returnKeyword);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::ReturnStatement);
+    return true;
+  }
+
+  bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
+    using Roles = syntax::RangeBasedForStatement::Roles;
+    Builder.markChildToken(S->getForLoc(), tok::kw_for, Roles::forKeyword);
+    Builder.markChild(S->getBody(), Roles::body);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::RangeBasedForStatement);
+    return true;
+  }
+
 private:
   /// A small helper to save some typing.
   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
@@ -254,6 +430,22 @@
   Pending.assignRole(*findToken(Loc), Role);
 }
 
+void syntax::TreeBuilder::markChild(Stmt *Child, NodeRole Role) {
+  if (!Child)
+    return;
+
+  auto Range = getRange(Child);
+  // This is an expression in a statement position, consume the trailing
+  // semicolon and form an 'ExpressionStatement' node.
+  if (auto *E = dyn_cast<Expr>(Child)) {
+    Pending.assignRole(getRange(E),
+                       syntax::ExpressionStatement::Roles::expression);
+    // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
+    Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement);
+  }
+  Pending.assignRole(Range, Role);
+}
+
 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
   auto Tokens = Arena.tokenBuffer().expandedTokens();
   auto &SM = Arena.sourceManager();
Index: clang/include/clang/Tooling/Syntax/Nodes.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Nodes.h
+++ clang/include/clang/Tooling/Syntax/Nodes.h
@@ -26,6 +26,21 @@
   Leaf,
   TranslationUnitDeclaration,
   TopLevelDeclaration,
+  UnknownExpression,
+  UnknownStatement,
+  DeclarationStatement,
+  EmptyStatement,
+  SwitchStatement,
+  CaseStatement,
+  DefaultStatement,
+  IfStatement,
+  ForStatement,
+  WhileStatement,
+  ContinueStatement,
+  BreakStatement,
+  ReturnStatement,
+  RangeBasedForStatement,
+  ExpressionStatement,
   CompoundStatement
 };
 /// For debugging purposes.
@@ -59,16 +74,238 @@
   }
 };
 
+/// A base class for all expressions. Note that expressions are not statements,
+/// even though they are in clang.
+class Expression : public Tree {
+public:
+  Expression(NodeKind K) : Tree(K) {}
+  static bool classof(const Node *N) {
+    return NodeKind::UnknownExpression <= N->kind() &&
+           N->kind() <= NodeKind::UnknownExpression;
+  }
+};
+
+/// An expression of an unknown kind, i.e. one not currently handled by the
+/// syntax tree.
+class UnknownExpression final : public Expression {
+public:
+  UnknownExpression() : Expression(NodeKind::UnknownExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownExpression;
+  }
+};
+
 /// An abstract node for C++ statements, e.g. 'while', 'if', etc.
 class Statement : public Tree {
 public:
   Statement(NodeKind K) : Tree(K) {}
   static bool classof(const Node *N) {
-    return NodeKind::CompoundStatement <= N->kind() &&
+    return NodeKind::UnknownStatement <= N->kind() &&
            N->kind() <= NodeKind::CompoundStatement;
   }
 };
 
+/// A statement of an unknown kind, i.e. one not currently handled by the syntax
+/// tree.
+class UnknownStatement final : public Statement {
+public:
+  UnknownStatement() : Statement(NodeKind::UnknownStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownStatement;
+  }
+};
+
+/// E.g. 'int a, b = 10;'
+class DeclarationStatement final : public Statement {
+public:
+  DeclarationStatement() : Statement(NodeKind::DeclarationStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::DeclarationStatement;
+  }
+};
+
+/// The no-op statement, i.e. ';'.
+class EmptyStatement final : public Statement {
+public:
+  EmptyStatement() : Statement(NodeKind::EmptyStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::EmptyStatement;
+  }
+};
+
+/// switch (<cond>) <body>
+class SwitchStatement final : public Statement {
+public:
+  SwitchStatement() : Statement(NodeKind::SwitchStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::SwitchStatement;
+  }
+  syntax::Leaf *switchKeyword();
+  syntax::Statement *body();
+
+  struct Roles {
+    static constexpr NodeRole switchKeyword = 1;
+    static constexpr NodeRole body = 2;
+  };
+};
+
+/// case <value>: <body>
+class CaseStatement final : public Statement {
+public:
+  CaseStatement() : Statement(NodeKind::CaseStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::CaseStatement;
+  }
+  syntax::Leaf *caseKeyword();
+  syntax::Statement *body();
+
+  struct Roles {
+    static constexpr NodeRole caseKeyword = 1;
+    static constexpr NodeRole body = 2;
+  };
+};
+
+/// default: <body>
+class DefaultStatement final : public Statement {
+public:
+  DefaultStatement() : Statement(NodeKind::DefaultStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::DefaultStatement;
+  }
+  syntax::Leaf *defaultKeyword();
+  syntax::Statement *body();
+
+  struct Roles {
+    static constexpr NodeRole defaultKeyword = 1;
+    static constexpr NodeRole body = 2;
+  };
+};
+
+/// if (cond) <then-statement> else <else-statement>
+class IfStatement final : public Statement {
+public:
+  IfStatement() : Statement(NodeKind::IfStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::IfStatement;
+  }
+  syntax::Leaf *ifKeyword();
+  syntax::Statement *thenStatement();
+  syntax::Leaf *elseKeyword();
+  syntax::Statement *elseStatement();
+
+  struct Roles {
+    static constexpr NodeRole ifKeyword = 1;
+    static constexpr NodeRole thenStatement = 2;
+    static constexpr NodeRole elseKeyword = 3;
+    static constexpr NodeRole elseStatement = 4;
+  };
+};
+
+/// for (<init>; <cond>; <increment>) <body>
+class ForStatement final : public Statement {
+public:
+  ForStatement() : Statement(NodeKind::ForStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ForStatement;
+  }
+  syntax::Leaf *forKeyword();
+  syntax::Statement *body();
+
+  struct Roles {
+    static constexpr NodeRole forKeyword = 1;
+    static constexpr NodeRole body = 2;
+  };
+};
+
+/// while (<cond>) <body>
+class WhileStatement final : public Statement {
+public:
+  WhileStatement() : Statement(NodeKind::WhileStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::WhileStatement;
+  }
+  syntax::Leaf *whileKeyword();
+  syntax::Statement *body();
+
+  struct Roles {
+    static constexpr NodeRole whileKeyword = 1;
+    static constexpr NodeRole body = 2;
+  };
+};
+
+/// continue;
+class ContinueStatement final : public Statement {
+public:
+  ContinueStatement() : Statement(NodeKind::ContinueStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ContinueStatement;
+  }
+  syntax::Leaf *continueKeyword();
+
+  struct Roles {
+    static constexpr NodeRole continueKeyword = 1;
+  };
+};
+
+/// break;
+class BreakStatement final : public Statement {
+public:
+  BreakStatement() : Statement(NodeKind::BreakStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::BreakStatement;
+  }
+  syntax::Leaf *breakKeyword();
+
+  struct Roles {
+    static constexpr NodeRole breakKeyword = 1;
+  };
+};
+
+/// return <expr>;
+class ReturnStatement final : public Statement {
+public:
+  ReturnStatement() : Statement(NodeKind::ReturnStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ReturnStatement;
+  }
+  syntax::Leaf *returnKeyword();
+
+  struct Roles {
+    static constexpr NodeRole returnKeyword = 1;
+  };
+};
+
+/// for (<decl> : <init>) <body>
+class RangeBasedForStatement final : public Statement {
+public:
+  RangeBasedForStatement() : Statement(NodeKind::RangeBasedForStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::RangeBasedForStatement;
+  }
+  syntax::Leaf *forKeyword();
+  syntax::Statement *body();
+
+  struct Roles {
+    static constexpr NodeRole forKeyword = 1;
+    static constexpr NodeRole body = 2;
+  };
+};
+
+/// Expression in a statement position, e.g. functions calls inside compound
+/// statements or inside a loop body.
+class ExpressionStatement final : public Statement {
+public:
+  ExpressionStatement() : Statement(NodeKind::ExpressionStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ExpressionStatement;
+  }
+  syntax::Expression *expression();
+
+  struct Roles {
+    static constexpr NodeRole expression = 1;
+  };
+};
+
 /// { statement1; statement2; … }
 class CompoundStatement final : public Statement {
 public:
@@ -77,11 +314,14 @@
     return N->kind() == NodeKind::CompoundStatement;
   }
   syntax::Leaf *lbrace();
+  /// FIXME: use custom iterator instead of 'vector'.
+  std::vector<syntax::Statement *> statements();
   syntax::Leaf *rbrace();
 
   struct Roles {
     static constexpr NodeRole lbrace = 1;
-    static constexpr NodeRole rbrace = 2;
+    static constexpr NodeRole statement = 2;
+    static constexpr NodeRole rbrace = 3;
   };
 };
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to