hokein created this revision. hokein added a reviewer: sammccall. Herald added a project: All. hokein requested review of this revision. Herald added a subscriber: alextsao1999. Herald added a project: clang-tools-extra.
- emit an enum type to identify each grammar rule (in lhs$$rhs$rhs$ form); - based on the enum type rule, implement a guard for function declarator, which will eliminate some false parses; Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D129222 Files: clang-tools-extra/pseudo/gen/Main.cpp clang-tools-extra/pseudo/include/clang-pseudo/Language.h clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h clang-tools-extra/pseudo/lib/GLR.cpp clang-tools-extra/pseudo/lib/cxx/CXX.cpp clang-tools-extra/pseudo/lib/cxx/cxx.bnf clang-tools-extra/pseudo/test/cxx/declarator-function.cpp clang-tools-extra/pseudo/test/cxx/declarator-var.cpp clang-tools-extra/pseudo/unittests/GLRTest.cpp
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp =================================================================== --- clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -635,7 +635,7 @@ )bnf"); TestLang.Guards.try_emplace( extensionID("TestOnly"), - [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { + [&](RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test"; Index: clang-tools-extra/pseudo/test/cxx/declarator-var.cpp =================================================================== --- clang-tools-extra/pseudo/test/cxx/declarator-var.cpp +++ clang-tools-extra/pseudo/test/cxx/declarator-var.cpp @@ -1,8 +1,6 @@ // The standard grammar allows an function-body to use any declarator, including // a non-function declarator. This creates an ambiguity where a // simple-declaration is misparsed as a function-definition. -// FIXME: eliminate this false parse. -// XFAIL: * // RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s void (*s)(){}; Index: clang-tools-extra/pseudo/test/cxx/declarator-function.cpp =================================================================== --- clang-tools-extra/pseudo/test/cxx/declarator-function.cpp +++ clang-tools-extra/pseudo/test/cxx/declarator-function.cpp @@ -1,11 +1,28 @@ // The standard grammar allows an init-list with any declarator, including // a function declarator. This creates an ambiguity where a function-definition // is misparsed as a simple-declaration. -// FIXME: eliminate this false parse. -// XFAIL: * // RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s void s(){}; -// CHECK-NOT: simple-declaration -// CHECK: function-definition := decl-specifier-seq declarator -// function-body CHECK-NOT: simple-declaration +// CHECK: ââdeclaration-seq~function-definition := decl-specifier-seq declarator function-body [guard=FunctionDeclarator] +// CHECK-NEXT: â ââdecl-specifier-seq~VOID := tok[0] +// CHECK-NEXT: â ââdeclarator~noptr-declarator := noptr-declarator parameters-and-qualifiers +// CHECK-NEXT: â â âânoptr-declarator~IDENTIFIER := tok[1] +// CHECK-NEXT: â â ââparameters-and-qualifiers := ( ) +// CHECK-NEXT: â â ââ( := tok[2] +// CHECK-NEXT: â â ââ) := tok[3] +// CHECK-NEXT: â ââfunction-body~compound-statement := { } +// CHECK-NEXT: â ââ{ := tok[4] +// CHECK-NEXT: â ââ} := tok[5] +// CHECK-NEXT: ââdeclaration~; := tok[6] + +// Should not parse as a function-definition. +int s2{}; +// CHECK-NEXT: declaration~simple-declaration := decl-specifier-seq init-declarator-list ; +// CHECK-NEXT: ââdecl-specifier-seq~INT := tok[7] +// CHECK-NEXT: ââinit-declarator-list~init-declarator := declarator initializer [guard=NotFunctionDeclarator] +// CHECK-NEXT: â ââdeclarator~IDENTIFIER := tok[8] +// CHECK-NEXT: â ââinitializer~braced-init-list := { } +// CHECK-NEXT: â ââ{ := tok[9] +// CHECK-NEXT: â ââ} := tok[10] +// CHECK-NEXT: ââ; := tok[11] Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf =================================================================== --- clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -402,7 +402,7 @@ placeholder-type-specifier := type-constraint_opt DECLTYPE ( AUTO ) init-declarator-list := init-declarator init-declarator-list := init-declarator-list , init-declarator -init-declarator := declarator initializer_opt +init-declarator := declarator initializer_opt [guard=NotFunctionDeclarator] init-declarator := declarator requires-clause declarator := ptr-declarator declarator := noptr-declarator parameters-and-qualifiers trailing-return-type @@ -472,8 +472,8 @@ expr-or-braced-init-list := expression expr-or-braced-init-list := braced-init-list # dcl.fct -function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body -function-definition := decl-specifier-seq_opt declarator requires-clause function-body +function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body [guard=FunctionDeclarator] +function-definition := decl-specifier-seq_opt declarator requires-clause function-body [guard=FunctionDeclarator] function-body := ctor-initializer_opt compound-statement function-body := function-try-block function-body := = DEFAULT ; Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp =================================================================== --- clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -21,24 +21,133 @@ #include "CXXBNF.inc" ; -bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS, +bool guardOverride(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override"; } -bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS, +bool guardFinal(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final"; } -llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() { - return { - {(ExtensionID)Extension::Override, guardOverride}, - {(ExtensionID)Extension::Final, guardFinal}, +// Determine the declarator-id of the given declarator is a function. +// +// E.g. +// `a` in `int a` => false, a is int +// `*p` in `int *p` => false, p is a pointer to int +// `*p()` in `int *p()` => true, p is a function +// `(*p)()` in `int (*p)()` => false, p is a pointer to a function +// +// FIXME: though the cost of this check is relatively cheap, we should +// probably consider caching the results to avoid redundant computations. +bool isFunctionDeclarator(const ForestNode *DeclaratorFN) { + assert(DeclaratorFN->symbol() == (SymbolID)(cxx::Symbol::declarator)); + enum DeclaratorKind { + Unknown = 0, + Function, + Other, // non-function kinds, pointer etc }; + DeclaratorKind Kind = Unknown; + // Declarators nest "inside out", we perform a DFS on the declarator: + // 1) descend into the inner-most node (which is a declarator-id) + // 2) walkup the stack, find the first rule that can determine the kind + auto DFS = [&Kind](const ForestNode *Declarator, auto DFS) { + // !! The declarator defined in the cxx.bnf should be unambiguous. + // FIXME: should we consider opaque node? + assert(Declarator->kind() == ForestNode::Sequence); + Rule R = (Rule)Declarator->rule(); + + // Reach to the inner-most node of the declarator, walkup now. + if (R == Rule::noptr_declarator$$declarator_id) + return; + // This is the main entry grammar rule for declarator. + if (R == Rule::declarator$$ptr_declarator) { + DFS(Declarator->elements()[0], DFS); + if (Kind == Unknown) + Kind = Other; // simple declarator (e.g. `a` in `int a`). + return; + } + // These rules don't contribute the declarator kind. + if (R == Rule::ptr_declarator$$noptr_declarator) + return DFS(Declarator->elements()[0], DFS); + if (R == Rule::noptr_declarator$$l_paren$ptr_declarator$r_paren) + return DFS(Declarator->elements()[1], DFS); + + if (R == Rule::ptr_declarator$$ptr_operator$ptr_declarator) { + // a pointer declarator p: `int *p` + // vs. a function-declarator p: `int *p()` + DFS(Declarator->elements()[1], DFS); + if (Kind == Unknown) + Kind = Other; // pointer declarator + return; + } + if (R == + Rule:: + noptr_declarator$$noptr_declarator$l_square$constant_expression$r_square || + R == Rule::noptr_declarator$$noptr_declarator$l_square$r_square) { + // `int s[];` vs `int (*s())[];` + DFS(Declarator->elements()[0], DFS); + if (Kind == Unknown) + Kind = Other; // array declarator + return; + } + if (R == + Rule:: + declarator$$noptr_declarator$parameters_and_qualifiers$trailing_return_type || + R == Rule:: + noptr_declarator$$noptr_declarator$parameters_and_qualifiers) { + DFS(Declarator->elements()[0], DFS); + if (Kind == Unknown) + Kind = Function; + return; + } + }; + DFS(DeclaratorFN, DFS); + assert(Kind != Unknown && "The declarator kind is unknown!"); + return Kind == Function; +} + +bool guardNotFunctionDeclarator(RuleID RID, + llvm::ArrayRef<const ForestNode *> RHS, + const TokenStream &Tokens) { + if ((Rule)(RID) == Rule::init_declarator$$declarator$initializer) + return !isFunctionDeclarator(RHS[0]); + return true; +} + +bool guardFunctionDeclarator(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, + const TokenStream &Tokens) { + auto R = (Rule)(RID); + size_t DeclaratorIndex = 0; + switch (R) { + case Rule::function_definition$$decl_specifier_seq$declarator$function_body: + case Rule:: + function_definition$$decl_specifier_seq$declarator$requires_clause$function_body: + case Rule:: + function_definition$$decl_specifier_seq$declarator$virt_specifier_seq$function_body: + DeclaratorIndex = 1; + break; + case Rule::function_definition$$declarator$function_body: + case Rule::function_definition$$declarator$requires_clause$function_body: + case Rule::function_definition$$declarator$virt_specifier_seq$function_body: + DeclaratorIndex = 0; + break; + default: + llvm_unreachable("Unhandle function-definition rules."); + } + return isFunctionDeclarator(RHS[DeclaratorIndex]); +} + +llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() { + return {{(ExtensionID)Extension::Override, guardOverride}, + {(ExtensionID)Extension::Final, guardFinal}, + {(ExtensionID)Extension::FunctionDeclarator, guardFunctionDeclarator}, + {(ExtensionID)Extension::NotFunctionDeclarator, + guardNotFunctionDeclarator}}; } Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) { Index: clang-tools-extra/pseudo/lib/GLR.cpp =================================================================== --- clang-tools-extra/pseudo/lib/GLR.cpp +++ clang-tools-extra/pseudo/lib/GLR.cpp @@ -421,7 +421,7 @@ if (!GuardID) return true; if (auto Guard = Lang.Guards.lookup(GuardID)) - return Guard(RHS, Params.Code); + return Guard(RID, RHS, Params.Code); LLVM_DEBUG(llvm::dbgs() << llvm::formatv("missing guard implementation for rule {0}\n", Lang.G.dumpRule(RID))); Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h =================================================================== --- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h +++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -37,6 +37,12 @@ #undef NONTERMINAL }; +enum class Rule : RuleID { +#define RULE(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef RULE +}; + enum class Extension : ExtensionID { #define EXTENSION(X, Y) X = Y, #include "CXXSymbols.inc" Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h =================================================================== --- clang-tools-extra/pseudo/include/clang-pseudo/Language.h +++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -27,7 +27,7 @@ // // Return true if the guard is satisfied. using RuleGuard = llvm::function_ref<bool( - llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>; + RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>; // A recovery strategy determines a region of code to skip when parsing fails. // Index: clang-tools-extra/pseudo/gen/Main.cpp =================================================================== --- clang-tools-extra/pseudo/gen/Main.cpp +++ clang-tools-extra/pseudo/gen/Main.cpp @@ -56,6 +56,21 @@ } return Text.get()->getBuffer().str(); } + +std::string symbolName(clang::pseudo::SymbolID SID, + const clang::pseudo::Grammar &G) { + static const char *const TokNames[] = { +#define TOK(X) #X, +#define KEYWORD(X, Y) #X, +#include "clang/Basic/TokenKinds.def" + nullptr}; + if (clang::pseudo::isToken(SID)) + return TokNames[clang::pseudo::symbolToToken(SID)]; + std::string Name = G.symbolName(SID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + return Name; +} } // namespace int main(int argc, char *argv[]) { @@ -83,16 +98,24 @@ #ifndef NONTERMINAL #define NONTERMINAL(X, Y) #endif +#ifndef RULE +#define RULE(X, Y) +#endif #ifndef EXTENSION #define EXTENSION(X, Y) #endif )cpp"; for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size(); - ++ID) { - std::string Name = G.symbolName(ID).str(); - // translation-unit -> translation_unit - std::replace(Name.begin(), Name.end(), '-', '_'); - Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID); + ++ID) + Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", symbolName(ID, G), + ID); + for (clang::pseudo::RuleID RID = 0; RID < G.table().Rules.size(); ++RID) { + const clang::pseudo::Rule &R = G.table().Rules[RID]; + // lhs$$rhs$rhs$rhs + std::string EnumName = symbolName(R.Target, G) + "$"; + for (clang::pseudo::SymbolID SeqID : R.seq()) + EnumName += "$" + symbolName(SeqID, G); + Out.os() << llvm::formatv("RULE({0}, {1})\n", EnumName, RID); } for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/; EID < G.table().AttributeValues.size(); ++EID) { @@ -102,6 +125,7 @@ } Out.os() << R"cpp( #undef NONTERMINAL +#undef RULE #undef EXTENSION )cpp"; break;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits