This lets ‘grep -E '(*a|+b)'’ warn about the * and the +. * lib/dfa.h (DFA_STAR_WARN, DFA_PLUS_WARN): New flags. * lib/dfa.c (lex): Support them. --- ChangeLog | 7 +++++++ lib/dfa.c | 51 ++++++++++++++++++++++++++++++++++----------------- lib/dfa.h | 8 ++++++++ 3 files changed, 49 insertions(+), 17 deletions(-)
diff --git a/ChangeLog b/ChangeLog index 088e3b3134..5b20aa58e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2022-05-24 Paul Eggert <egg...@cs.ucla.edu> + + dfa: new options DFA_STAR_WARN, DFA_PLUS_WARN + This lets ‘grep -E '(*a|+b)'’ warn about the * and the +. + * lib/dfa.h (DFA_STAR_WARN, DFA_PLUS_WARN): New flags. + * lib/dfa.c (lex): Support them. + 2022-05-23 Paul Eggert <egg...@cs.ucla.edu> dfa: '\n' is not governed by RE_LIMITED_OPS diff --git a/lib/dfa.c b/lib/dfa.c index 5d92b38b4c..bd4c5f0582 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -1311,17 +1311,25 @@ lex (struct dfa *dfa) goto default_case; if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) goto normal_char; - if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) - && dfa->lex.laststart) - goto normal_char; + if (dfa->lex.laststart) + { + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)) + goto default_case; + if (dfa->syntax.dfaopts & DFA_PLUS_WARN) + dfawarn (_("? at start of expression")); + } return dfa->lex.lasttok = QMARK; case '*': if (backslash) goto normal_char; - if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) - && dfa->lex.laststart) - goto normal_char; + if (dfa->lex.laststart) + { + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)) + goto default_case; + if (dfa->syntax.dfaopts & DFA_STAR_WARN) + dfawarn (_("* at start of expression")); + } return dfa->lex.lasttok = STAR; case '+': @@ -1329,9 +1337,13 @@ lex (struct dfa *dfa) goto default_case; if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) goto normal_char; - if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) - && dfa->lex.laststart) - goto normal_char; + if (dfa->lex.laststart) + { + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)) + goto default_case; + if (dfa->syntax.dfaopts & DFA_PLUS_WARN) + dfawarn (_("+ at start of expression")); + } return dfa->lex.lasttok = PLUS; case '{': @@ -1339,9 +1351,6 @@ lex (struct dfa *dfa) goto default_case; if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0)) goto normal_char; - if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) - && dfa->lex.laststart) - goto normal_char; /* Cases: {M} - exact count @@ -1374,16 +1383,24 @@ lex (struct dfa *dfa) dfa->lex.maxrep * 10 + *p - '0')); } } - if (! ((! backslash || (p != lim && *p++ == '\\')) + bool invalid_content + = ! ((! backslash || (p != lim && *p++ == '\\')) && p != lim && *p++ == '}' && 0 <= dfa->lex.minrep && (dfa->lex.maxrep < 0 - || dfa->lex.minrep <= dfa->lex.maxrep))) + || dfa->lex.minrep <= dfa->lex.maxrep)); + if (invalid_content + && (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)) + goto normal_char; + if (dfa->lex.laststart) { - if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD) - goto normal_char; - dfaerror (_("invalid content of \\{\\}")); + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)) + goto default_case; + if (dfa->syntax.dfaopts & DFA_PLUS_WARN) + dfawarn (_("{...} at start of expression")); } + if (invalid_content) + dfaerror (_("invalid content of \\{\\}")); if (RE_DUP_MAX < dfa->lex.maxrep) dfaerror (_("regular expression too big")); dfa->lex.ptr = p; diff --git a/lib/dfa.h b/lib/dfa.h index 8674929e90..91ec1d809f 100644 --- a/lib/dfa.h +++ b/lib/dfa.h @@ -81,6 +81,14 @@ enum /* Warn about stray backslashes before ordinary characters. */ DFA_STRAY_BACKSLASH_WARN = 1 << 3, + + /* Warn about * appearing out of context at the start of an + expression or subexpression. */ + DFA_STAR_WARN = 1 << 4, + + /* Warn about +, ?, {...} appearing out of context at the start of + an expression or subexpression. */ + DFA_PLUS_WARN = 1 << 5, }; /* Initialize or reinitialize a DFA. The arguments are: -- 2.36.1