hokein created this revision.
hokein added a reviewer: sammccall.
Herald added a project: All.
hokein requested review of this revision.
Herald added a subscriber: alextsao1999.
Herald added a project: clang-tools-extra.

- emit an enum type to identify each grammar rule (in lhs$$rhs$rhs$ form);
- based on the enum type rule, implement a guard for function declarator, which 
will eliminate some false parses;


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D129222

Files:
  clang-tools-extra/pseudo/gen/Main.cpp
  clang-tools-extra/pseudo/include/clang-pseudo/Language.h
  clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
  clang-tools-extra/pseudo/lib/GLR.cpp
  clang-tools-extra/pseudo/lib/cxx/CXX.cpp
  clang-tools-extra/pseudo/lib/cxx/cxx.bnf
  clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
  clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
  clang-tools-extra/pseudo/unittests/GLRTest.cpp

Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -635,7 +635,7 @@
   )bnf");
   TestLang.Guards.try_emplace(
       extensionID("TestOnly"),
-      [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
+      [&](RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
         assert(RHS.size() == 1 &&
                RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
         return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test";
Index: clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
===================================================================
--- clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
+++ clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
@@ -1,8 +1,6 @@
 // The standard grammar allows an function-body to use any declarator, including
 // a non-function declarator. This creates an ambiguity where a
 // simple-declaration is misparsed as a function-definition.
-// FIXME: eliminate this false parse.
-// XFAIL: *
 
 // RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
 void (*s)(){};
Index: clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
===================================================================
--- clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
+++ clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
@@ -1,11 +1,28 @@
 // The standard grammar allows an init-list with any declarator, including
 // a function declarator. This creates an ambiguity where a function-definition
 // is misparsed as a simple-declaration.
-// FIXME: eliminate this false parse.
-// XFAIL: *
 
 // RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
 void s(){};
-// CHECK-NOT:      simple-declaration
-// CHECK:          function-definition := decl-specifier-seq declarator
-// function-body CHECK-NOT:      simple-declaration
+// CHECK:      ├─declaration-seq~function-definition := decl-specifier-seq declarator function-body                 [guard=FunctionDeclarator]
+// CHECK-NEXT: │ ├─decl-specifier-seq~VOID := tok[0]
+// CHECK-NEXT: │ ├─declarator~noptr-declarator := noptr-declarator parameters-and-qualifiers
+// CHECK-NEXT: │ │ ├─noptr-declarator~IDENTIFIER := tok[1]
+// CHECK-NEXT: │ │ └─parameters-and-qualifiers := ( )
+// CHECK-NEXT: │ │   ├─( := tok[2]
+// CHECK-NEXT: │ │   └─) := tok[3]
+// CHECK-NEXT: │ └─function-body~compound-statement := { }
+// CHECK-NEXT: │   ├─{ := tok[4]
+// CHECK-NEXT: │   └─} := tok[5]
+// CHECK-NEXT: └─declaration~; := tok[6]
+
+// Should not parse as a function-definition.
+int s2{};
+// CHECK-NEXT:  declaration~simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK-NEXT:  ├─decl-specifier-seq~INT := tok[7]
+// CHECK-NEXT:  ├─init-declarator-list~init-declarator := declarator initializer [guard=NotFunctionDeclarator]
+// CHECK-NEXT:  │ ├─declarator~IDENTIFIER := tok[8]
+// CHECK-NEXT:  │ └─initializer~braced-init-list := { }
+// CHECK-NEXT:  │   ├─{ := tok[9]
+// CHECK-NEXT:  │   └─} := tok[10]
+// CHECK-NEXT:  └─; := tok[11]
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -402,7 +402,7 @@
 placeholder-type-specifier := type-constraint_opt DECLTYPE ( AUTO )
 init-declarator-list := init-declarator
 init-declarator-list := init-declarator-list , init-declarator
-init-declarator := declarator initializer_opt
+init-declarator := declarator initializer_opt [guard=NotFunctionDeclarator]
 init-declarator := declarator requires-clause
 declarator := ptr-declarator
 declarator := noptr-declarator parameters-and-qualifiers trailing-return-type
@@ -472,8 +472,8 @@
 expr-or-braced-init-list := expression
 expr-or-braced-init-list := braced-init-list
 # dcl.fct
-function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body
-function-definition := decl-specifier-seq_opt declarator requires-clause function-body
+function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body [guard=FunctionDeclarator]
+function-definition := decl-specifier-seq_opt declarator requires-clause function-body [guard=FunctionDeclarator]
 function-body := ctor-initializer_opt compound-statement
 function-body := function-try-block
 function-body := = DEFAULT ;
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -21,24 +21,133 @@
 #include "CXXBNF.inc"
     ;
 
-bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS,
+bool guardOverride(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
                    const TokenStream &Tokens) {
   assert(RHS.size() == 1 &&
          RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
   return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
 }
-bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS,
+bool guardFinal(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
                 const TokenStream &Tokens) {
   assert(RHS.size() == 1 &&
          RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
   return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
 }
 
-llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
-  return {
-      {(ExtensionID)Extension::Override, guardOverride},
-      {(ExtensionID)Extension::Final, guardFinal},
+// Determine the declarator-id of the given declarator is a function.
+//
+// E.g.
+//   `a` in `int a`           => false, a is int
+//   `*p` in `int *p`         => false, p is a pointer to int
+//   `*p()` in `int *p()`     => true, p is a function
+//   `(*p)()` in `int (*p)()` => false, p is a pointer to a function
+//
+// FIXME: though the cost of this check is relatively cheap, we should
+// probably consider caching the results to avoid redundant computations.
+bool isFunctionDeclarator(const ForestNode *DeclaratorFN) {
+  assert(DeclaratorFN->symbol() == (SymbolID)(cxx::Symbol::declarator));
+  enum DeclaratorKind {
+    Unknown = 0,
+    Function,
+    Other, // non-function kinds, pointer etc
   };
+  DeclaratorKind Kind = Unknown;
+  // Declarators nest "inside out", we perform a DFS on the declarator:
+  //  1) descend into the inner-most node (which is a declarator-id)
+  //  2) walkup the stack, find the first rule that can determine the kind
+  auto DFS = [&Kind](const ForestNode *Declarator, auto DFS) {
+    // !! The declarator defined in the cxx.bnf should be unambiguous.
+    // FIXME: should we consider opaque node?
+    assert(Declarator->kind() == ForestNode::Sequence);
+    Rule R = (Rule)Declarator->rule();
+
+    // Reach to the inner-most node of the declarator, walkup now.
+    if (R == Rule::noptr_declarator$$declarator_id)
+      return;
+    // This is the main entry grammar rule for declarator.
+    if (R == Rule::declarator$$ptr_declarator) {
+      DFS(Declarator->elements()[0], DFS);
+      if (Kind == Unknown)
+        Kind = Other; // simple declarator (e.g. `a` in `int a`).
+      return;
+    }
+    // These rules don't contribute the declarator kind.
+    if (R == Rule::ptr_declarator$$noptr_declarator)
+      return DFS(Declarator->elements()[0], DFS);
+    if (R == Rule::noptr_declarator$$l_paren$ptr_declarator$r_paren)
+      return DFS(Declarator->elements()[1], DFS);
+
+    if (R == Rule::ptr_declarator$$ptr_operator$ptr_declarator) {
+      // a pointer declarator p: `int *p`
+      // vs. a function-declarator p: `int *p()`
+      DFS(Declarator->elements()[1], DFS);
+      if (Kind == Unknown)
+        Kind = Other; //  pointer declarator
+      return;
+    }
+    if (R ==
+            Rule::
+                noptr_declarator$$noptr_declarator$l_square$constant_expression$r_square ||
+        R == Rule::noptr_declarator$$noptr_declarator$l_square$r_square) {
+      // `int s[];` vs `int (*s())[];`
+      DFS(Declarator->elements()[0], DFS);
+      if (Kind == Unknown)
+        Kind = Other; // array declarator
+      return;
+    }
+    if (R ==
+            Rule::
+                declarator$$noptr_declarator$parameters_and_qualifiers$trailing_return_type ||
+        R == Rule::
+                 noptr_declarator$$noptr_declarator$parameters_and_qualifiers) {
+      DFS(Declarator->elements()[0], DFS);
+      if (Kind == Unknown)
+        Kind = Function;
+      return;
+    }
+  };
+  DFS(DeclaratorFN, DFS);
+  assert(Kind != Unknown && "The declarator kind is unknown!");
+  return Kind == Function;
+}
+
+bool guardNotFunctionDeclarator(RuleID RID,
+                                llvm::ArrayRef<const ForestNode *> RHS,
+                                const TokenStream &Tokens) {
+  if ((Rule)(RID) == Rule::init_declarator$$declarator$initializer)
+    return !isFunctionDeclarator(RHS[0]);
+  return true;
+}
+
+bool guardFunctionDeclarator(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
+                             const TokenStream &Tokens) {
+  auto R = (Rule)(RID);
+  size_t DeclaratorIndex = 0;
+  switch (R) {
+  case Rule::function_definition$$decl_specifier_seq$declarator$function_body:
+  case Rule::
+      function_definition$$decl_specifier_seq$declarator$requires_clause$function_body:
+  case Rule::
+      function_definition$$decl_specifier_seq$declarator$virt_specifier_seq$function_body:
+    DeclaratorIndex = 1;
+    break;
+  case Rule::function_definition$$declarator$function_body:
+  case Rule::function_definition$$declarator$requires_clause$function_body:
+  case Rule::function_definition$$declarator$virt_specifier_seq$function_body:
+    DeclaratorIndex = 0;
+    break;
+  default:
+    llvm_unreachable("Unhandle function-definition rules.");
+  }
+  return isFunctionDeclarator(RHS[DeclaratorIndex]);
+}
+
+llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+  return {{(ExtensionID)Extension::Override, guardOverride},
+          {(ExtensionID)Extension::Final, guardFinal},
+          {(ExtensionID)Extension::FunctionDeclarator, guardFunctionDeclarator},
+          {(ExtensionID)Extension::NotFunctionDeclarator,
+           guardNotFunctionDeclarator}};
 }
 
 Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) {
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -421,7 +421,7 @@
     if (!GuardID)
       return true;
     if (auto Guard = Lang.Guards.lookup(GuardID))
-      return Guard(RHS, Params.Code);
+      return Guard(RID, RHS, Params.Code);
     LLVM_DEBUG(llvm::dbgs()
                << llvm::formatv("missing guard implementation for rule {0}\n",
                                 Lang.G.dumpRule(RID)));
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -37,6 +37,12 @@
 #undef NONTERMINAL
 };
 
+enum class Rule : RuleID {
+#define RULE(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef RULE
+};
+
 enum class Extension : ExtensionID {
 #define EXTENSION(X, Y) X = Y,
 #include "CXXSymbols.inc"
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -27,7 +27,7 @@
 //
 // Return true if the guard is satisfied.
 using RuleGuard = llvm::function_ref<bool(
-    llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
+    RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
 
 // A recovery strategy determines a region of code to skip when parsing fails.
 //
Index: clang-tools-extra/pseudo/gen/Main.cpp
===================================================================
--- clang-tools-extra/pseudo/gen/Main.cpp
+++ clang-tools-extra/pseudo/gen/Main.cpp
@@ -56,6 +56,21 @@
   }
   return Text.get()->getBuffer().str();
 }
+
+std::string symbolName(clang::pseudo::SymbolID SID,
+                       const clang::pseudo::Grammar &G) {
+  static const char *const TokNames[] = {
+#define TOK(X) #X,
+#define KEYWORD(X, Y) #X,
+#include "clang/Basic/TokenKinds.def"
+      nullptr};
+  if (clang::pseudo::isToken(SID))
+    return TokNames[clang::pseudo::symbolToToken(SID)];
+  std::string Name = G.symbolName(SID).str();
+  // translation-unit -> translation_unit
+  std::replace(Name.begin(), Name.end(), '-', '_');
+  return Name;
+}
 } // namespace
 
 int main(int argc, char *argv[]) {
@@ -83,16 +98,24 @@
 #ifndef NONTERMINAL
 #define NONTERMINAL(X, Y)
 #endif
+#ifndef RULE
+#define RULE(X, Y)
+#endif
 #ifndef EXTENSION
 #define EXTENSION(X, Y)
 #endif
     )cpp";
     for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
-         ++ID) {
-      std::string Name = G.symbolName(ID).str();
-      // translation-unit -> translation_unit
-      std::replace(Name.begin(), Name.end(), '-', '_');
-      Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
+         ++ID)
+      Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", symbolName(ID, G),
+                                ID);
+    for (clang::pseudo::RuleID RID = 0; RID < G.table().Rules.size(); ++RID) {
+      const clang::pseudo::Rule &R = G.table().Rules[RID];
+      // lhs$$rhs$rhs$rhs
+      std::string EnumName = symbolName(R.Target, G) + "$";
+      for (clang::pseudo::SymbolID SeqID : R.seq())
+        EnumName += "$" + symbolName(SeqID, G);
+      Out.os() << llvm::formatv("RULE({0}, {1})\n", EnumName, RID);
     }
     for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
          EID < G.table().AttributeValues.size(); ++EID) {
@@ -102,6 +125,7 @@
     }
     Out.os() << R"cpp(
 #undef NONTERMINAL
+#undef RULE
 #undef EXTENSION
     )cpp";
     break;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to