hokein created this revision.
hokein added a reviewer: sammccall.
Herald added a project: All.
hokein requested review of this revision.
Herald added a subscriber: alextsao1999.
Herald added a project: clang-tools-extra.
- emit an enum type to identify each grammar rule (in lhs$$rhs$rhs$ form);
- based on the enum type rule, implement a guard for function declarator, which
will eliminate some false parses;
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D129222
Files:
clang-tools-extra/pseudo/gen/Main.cpp
clang-tools-extra/pseudo/include/clang-pseudo/Language.h
clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
clang-tools-extra/pseudo/lib/cxx/cxx.bnf
clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -635,7 +635,7 @@
)bnf");
TestLang.Guards.try_emplace(
extensionID("TestOnly"),
- [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
+ [&](RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
assert(RHS.size() == 1 &&
RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test";
Index: clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
===================================================================
--- clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
+++ clang-tools-extra/pseudo/test/cxx/declarator-var.cpp
@@ -1,8 +1,6 @@
// The standard grammar allows an function-body to use any declarator, including
// a non-function declarator. This creates an ambiguity where a
// simple-declaration is misparsed as a function-definition.
-// FIXME: eliminate this false parse.
-// XFAIL: *
// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
void (*s)(){};
Index: clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
===================================================================
--- clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
+++ clang-tools-extra/pseudo/test/cxx/declarator-function.cpp
@@ -1,11 +1,28 @@
// The standard grammar allows an init-list with any declarator, including
// a function declarator. This creates an ambiguity where a function-definition
// is misparsed as a simple-declaration.
-// FIXME: eliminate this false parse.
-// XFAIL: *
// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
void s(){};
-// CHECK-NOT: simple-declaration
-// CHECK: function-definition := decl-specifier-seq declarator
-// function-body CHECK-NOT: simple-declaration
+// CHECK: ââdeclaration-seq~function-definition := decl-specifier-seq declarator function-body [guard=FunctionDeclarator]
+// CHECK-NEXT: â ââdecl-specifier-seq~VOID := tok[0]
+// CHECK-NEXT: â ââdeclarator~noptr-declarator := noptr-declarator parameters-and-qualifiers
+// CHECK-NEXT: â â âânoptr-declarator~IDENTIFIER := tok[1]
+// CHECK-NEXT: â â ââparameters-and-qualifiers := ( )
+// CHECK-NEXT: â â ââ( := tok[2]
+// CHECK-NEXT: â â ââ) := tok[3]
+// CHECK-NEXT: â ââfunction-body~compound-statement := { }
+// CHECK-NEXT: â ââ{ := tok[4]
+// CHECK-NEXT: â ââ} := tok[5]
+// CHECK-NEXT: ââdeclaration~; := tok[6]
+
+// Should not parse as a function-definition.
+int s2{};
+// CHECK-NEXT: declaration~simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK-NEXT: ââdecl-specifier-seq~INT := tok[7]
+// CHECK-NEXT: ââinit-declarator-list~init-declarator := declarator initializer [guard=NotFunctionDeclarator]
+// CHECK-NEXT: â ââdeclarator~IDENTIFIER := tok[8]
+// CHECK-NEXT: â ââinitializer~braced-init-list := { }
+// CHECK-NEXT: â ââ{ := tok[9]
+// CHECK-NEXT: â ââ} := tok[10]
+// CHECK-NEXT: ââ; := tok[11]
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -402,7 +402,7 @@
placeholder-type-specifier := type-constraint_opt DECLTYPE ( AUTO )
init-declarator-list := init-declarator
init-declarator-list := init-declarator-list , init-declarator
-init-declarator := declarator initializer_opt
+init-declarator := declarator initializer_opt [guard=NotFunctionDeclarator]
init-declarator := declarator requires-clause
declarator := ptr-declarator
declarator := noptr-declarator parameters-and-qualifiers trailing-return-type
@@ -472,8 +472,8 @@
expr-or-braced-init-list := expression
expr-or-braced-init-list := braced-init-list
# dcl.fct
-function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body
-function-definition := decl-specifier-seq_opt declarator requires-clause function-body
+function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body [guard=FunctionDeclarator]
+function-definition := decl-specifier-seq_opt declarator requires-clause function-body [guard=FunctionDeclarator]
function-body := ctor-initializer_opt compound-statement
function-body := function-try-block
function-body := = DEFAULT ;
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -21,24 +21,133 @@
#include "CXXBNF.inc"
;
-bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS,
+bool guardOverride(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
const TokenStream &Tokens) {
assert(RHS.size() == 1 &&
RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
}
-bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS,
+bool guardFinal(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
const TokenStream &Tokens) {
assert(RHS.size() == 1 &&
RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
}
-llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
- return {
- {(ExtensionID)Extension::Override, guardOverride},
- {(ExtensionID)Extension::Final, guardFinal},
+// Determine the declarator-id of the given declarator is a function.
+//
+// E.g.
+// `a` in `int a` => false, a is int
+// `*p` in `int *p` => false, p is a pointer to int
+// `*p()` in `int *p()` => true, p is a function
+// `(*p)()` in `int (*p)()` => false, p is a pointer to a function
+//
+// FIXME: though the cost of this check is relatively cheap, we should
+// probably consider caching the results to avoid redundant computations.
+bool isFunctionDeclarator(const ForestNode *DeclaratorFN) {
+ assert(DeclaratorFN->symbol() == (SymbolID)(cxx::Symbol::declarator));
+ enum DeclaratorKind {
+ Unknown = 0,
+ Function,
+ Other, // non-function kinds, pointer etc
};
+ DeclaratorKind Kind = Unknown;
+ // Declarators nest "inside out", we perform a DFS on the declarator:
+ // 1) descend into the inner-most node (which is a declarator-id)
+ // 2) walkup the stack, find the first rule that can determine the kind
+ auto DFS = [&Kind](const ForestNode *Declarator, auto DFS) {
+ // !! The declarator defined in the cxx.bnf should be unambiguous.
+ // FIXME: should we consider opaque node?
+ assert(Declarator->kind() == ForestNode::Sequence);
+ Rule R = (Rule)Declarator->rule();
+
+ // Reach to the inner-most node of the declarator, walkup now.
+ if (R == Rule::noptr_declarator$$declarator_id)
+ return;
+ // This is the main entry grammar rule for declarator.
+ if (R == Rule::declarator$$ptr_declarator) {
+ DFS(Declarator->elements()[0], DFS);
+ if (Kind == Unknown)
+ Kind = Other; // simple declarator (e.g. `a` in `int a`).
+ return;
+ }
+ // These rules don't contribute the declarator kind.
+ if (R == Rule::ptr_declarator$$noptr_declarator)
+ return DFS(Declarator->elements()[0], DFS);
+ if (R == Rule::noptr_declarator$$l_paren$ptr_declarator$r_paren)
+ return DFS(Declarator->elements()[1], DFS);
+
+ if (R == Rule::ptr_declarator$$ptr_operator$ptr_declarator) {
+ // a pointer declarator p: `int *p`
+ // vs. a function-declarator p: `int *p()`
+ DFS(Declarator->elements()[1], DFS);
+ if (Kind == Unknown)
+ Kind = Other; // pointer declarator
+ return;
+ }
+ if (R ==
+ Rule::
+ noptr_declarator$$noptr_declarator$l_square$constant_expression$r_square ||
+ R == Rule::noptr_declarator$$noptr_declarator$l_square$r_square) {
+ // `int s[];` vs `int (*s())[];`
+ DFS(Declarator->elements()[0], DFS);
+ if (Kind == Unknown)
+ Kind = Other; // array declarator
+ return;
+ }
+ if (R ==
+ Rule::
+ declarator$$noptr_declarator$parameters_and_qualifiers$trailing_return_type ||
+ R == Rule::
+ noptr_declarator$$noptr_declarator$parameters_and_qualifiers) {
+ DFS(Declarator->elements()[0], DFS);
+ if (Kind == Unknown)
+ Kind = Function;
+ return;
+ }
+ };
+ DFS(DeclaratorFN, DFS);
+ assert(Kind != Unknown && "The declarator kind is unknown!");
+ return Kind == Function;
+}
+
+bool guardNotFunctionDeclarator(RuleID RID,
+ llvm::ArrayRef<const ForestNode *> RHS,
+ const TokenStream &Tokens) {
+ if ((Rule)(RID) == Rule::init_declarator$$declarator$initializer)
+ return !isFunctionDeclarator(RHS[0]);
+ return true;
+}
+
+bool guardFunctionDeclarator(RuleID RID, llvm::ArrayRef<const ForestNode *> RHS,
+ const TokenStream &Tokens) {
+ auto R = (Rule)(RID);
+ size_t DeclaratorIndex = 0;
+ switch (R) {
+ case Rule::function_definition$$decl_specifier_seq$declarator$function_body:
+ case Rule::
+ function_definition$$decl_specifier_seq$declarator$requires_clause$function_body:
+ case Rule::
+ function_definition$$decl_specifier_seq$declarator$virt_specifier_seq$function_body:
+ DeclaratorIndex = 1;
+ break;
+ case Rule::function_definition$$declarator$function_body:
+ case Rule::function_definition$$declarator$requires_clause$function_body:
+ case Rule::function_definition$$declarator$virt_specifier_seq$function_body:
+ DeclaratorIndex = 0;
+ break;
+ default:
+ llvm_unreachable("Unhandle function-definition rules.");
+ }
+ return isFunctionDeclarator(RHS[DeclaratorIndex]);
+}
+
+llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+ return {{(ExtensionID)Extension::Override, guardOverride},
+ {(ExtensionID)Extension::Final, guardFinal},
+ {(ExtensionID)Extension::FunctionDeclarator, guardFunctionDeclarator},
+ {(ExtensionID)Extension::NotFunctionDeclarator,
+ guardNotFunctionDeclarator}};
}
Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) {
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -421,7 +421,7 @@
if (!GuardID)
return true;
if (auto Guard = Lang.Guards.lookup(GuardID))
- return Guard(RHS, Params.Code);
+ return Guard(RID, RHS, Params.Code);
LLVM_DEBUG(llvm::dbgs()
<< llvm::formatv("missing guard implementation for rule {0}\n",
Lang.G.dumpRule(RID)));
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -37,6 +37,12 @@
#undef NONTERMINAL
};
+enum class Rule : RuleID {
+#define RULE(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef RULE
+};
+
enum class Extension : ExtensionID {
#define EXTENSION(X, Y) X = Y,
#include "CXXSymbols.inc"
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -27,7 +27,7 @@
//
// Return true if the guard is satisfied.
using RuleGuard = llvm::function_ref<bool(
- llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
+ RuleID RID, llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
// A recovery strategy determines a region of code to skip when parsing fails.
//
Index: clang-tools-extra/pseudo/gen/Main.cpp
===================================================================
--- clang-tools-extra/pseudo/gen/Main.cpp
+++ clang-tools-extra/pseudo/gen/Main.cpp
@@ -56,6 +56,21 @@
}
return Text.get()->getBuffer().str();
}
+
+std::string symbolName(clang::pseudo::SymbolID SID,
+ const clang::pseudo::Grammar &G) {
+ static const char *const TokNames[] = {
+#define TOK(X) #X,
+#define KEYWORD(X, Y) #X,
+#include "clang/Basic/TokenKinds.def"
+ nullptr};
+ if (clang::pseudo::isToken(SID))
+ return TokNames[clang::pseudo::symbolToToken(SID)];
+ std::string Name = G.symbolName(SID).str();
+ // translation-unit -> translation_unit
+ std::replace(Name.begin(), Name.end(), '-', '_');
+ return Name;
+}
} // namespace
int main(int argc, char *argv[]) {
@@ -83,16 +98,24 @@
#ifndef NONTERMINAL
#define NONTERMINAL(X, Y)
#endif
+#ifndef RULE
+#define RULE(X, Y)
+#endif
#ifndef EXTENSION
#define EXTENSION(X, Y)
#endif
)cpp";
for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
- ++ID) {
- std::string Name = G.symbolName(ID).str();
- // translation-unit -> translation_unit
- std::replace(Name.begin(), Name.end(), '-', '_');
- Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
+ ++ID)
+ Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", symbolName(ID, G),
+ ID);
+ for (clang::pseudo::RuleID RID = 0; RID < G.table().Rules.size(); ++RID) {
+ const clang::pseudo::Rule &R = G.table().Rules[RID];
+ // lhs$$rhs$rhs$rhs
+ std::string EnumName = symbolName(R.Target, G) + "$";
+ for (clang::pseudo::SymbolID SeqID : R.seq())
+ EnumName += "$" + symbolName(SeqID, G);
+ Out.os() << llvm::formatv("RULE({0}, {1})\n", EnumName, RID);
}
for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
EID < G.table().AttributeValues.size(); ++EID) {
@@ -102,6 +125,7 @@
}
Out.os() << R"cpp(
#undef NONTERMINAL
+#undef RULE
#undef EXTENSION
)cpp";
break;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits