On Mon, Jul 08, 2024 at 09:55:57AM -0700, Andi Kleen wrote:
> Implement a C23 clang compatible musttail attribute similar to the earlier
> C++ implementation in the C parser.
Sorry that we haven't reviewed the C parts before.
>
> PR83324
I don't think this is going to pass the pre-commit check; better write it as
PR c/83324.
> gcc/c/ChangeLog:
>
> * c-parser.cc (struct attr_state): Define with musttail_p.
> (c_parser_statement_after_labels): Handle [[musttail]]
Missing a '.'.
> (c_parser_std_attribute): Dito.
> (c_parser_handle_musttail): Dito.
> (c_parser_compound_statement_nostart): Dito.
> (c_parser_all_labels): Dito.
> (c_parser_statement): Dito.
> * c-tree.h (c_finish_return): Add musttail_p flag.
> * c-typeck.cc (c_finish_return): Handle musttail_p flag.
> ---
> gcc/c/c-parser.cc | 59 +++++++++++++++++++++++++++++++++++++----------
> gcc/c/c-tree.h | 2 +-
> gcc/c/c-typeck.cc | 15 ++++++++++--
> 3 files changed, 61 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> index 8c4e697a4e10..ce1c2c2be835 100644
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -1621,6 +1621,11 @@ struct omp_for_parse_data {
> bool fail : 1;
> };
>
> +struct attr_state
> +{
> + bool musttail_p; // parsed a musttail for return
This should follow the usual comment conventions:
/* True if we parsed a musttail attribute for return. */
> +};
> +
> static bool c_parser_nth_token_starts_std_attributes (c_parser *,
> unsigned int);
> static tree c_parser_std_attribute_specifier_sequence (c_parser *);
> @@ -1665,7 +1670,7 @@ static location_t c_parser_compound_statement_nostart
> (c_parser *);
> static void c_parser_label (c_parser *, tree);
> static void c_parser_statement (c_parser *, bool *, location_t * = NULL);
> static void c_parser_statement_after_labels (c_parser *, bool *,
> - vec<tree> * = NULL);
> + vec<tree> * = NULL, attr_state =
> {});
> static tree c_parser_c99_block_statement (c_parser *, bool *,
> location_t * = NULL);
> static void c_parser_if_statement (c_parser *, bool *, vec<tree> *);
> @@ -6982,6 +6987,28 @@ c_parser_handle_directive_omp_attributes (tree &attrs,
> }
> }
>
> +/* Check if STD_ATTR contains a musttail attribute and handle it
Missing a '.'. Also, "handle" is pretty generic, maybe say "and remove it
if it precedes a return"?
> + PARSER is the parser and A is the output attr_state. */
> +
> +static tree
> +c_parser_handle_musttail (c_parser *parser, tree std_attrs, attr_state &a)
> +{
> + if (c_parser_next_token_is_keyword (parser, RID_RETURN))
The point of this check is to keep a musttail attribute around to give
a diagnostic for code like [[gnu::musttail]] 42;, correct?
> + {
> + if (lookup_attribute ("gnu", "musttail", std_attrs))
> + {
> + std_attrs = remove_attribute ("gnu", "musttail", std_attrs);
> + a.musttail_p = true;
> + }
> + if (lookup_attribute ("clang", "musttail", std_attrs))
> + {
> + std_attrs = remove_attribute ("clang", "musttail", std_attrs);
> + a.musttail_p = true;
> + }
> + }
> + return std_attrs;
> +}
> +
> /* Parse a compound statement except for the opening brace. This is
> used for parsing both compound statements and statement expressions
> (which follow different paths to handling the opening). */
> @@ -6998,6 +7025,7 @@ c_parser_compound_statement_nostart (c_parser *parser)
> bool in_omp_loop_block
> = omp_for_parse_state ? omp_for_parse_state->want_nested_loop : false;
> tree sl = NULL_TREE;
> + attr_state a = {};
>
> if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE))
> {
> @@ -7138,7 +7166,10 @@ c_parser_compound_statement_nostart (c_parser *parser)
> = c_parser_nth_token_starts_std_attributes (parser, 1);
> tree std_attrs = NULL_TREE;
> if (have_std_attrs)
> - std_attrs = c_parser_std_attribute_specifier_sequence (parser);
> + {
> + std_attrs = c_parser_std_attribute_specifier_sequence (parser);
> + std_attrs = c_parser_handle_musttail (parser, std_attrs, a);
> + }
> if (c_parser_next_token_is_keyword (parser, RID_CASE)
> || c_parser_next_token_is_keyword (parser, RID_DEFAULT)
> || (c_parser_next_token_is (parser, CPP_NAME)
> @@ -7286,7 +7317,7 @@ c_parser_compound_statement_nostart (c_parser *parser)
> last_stmt = true;
> mark_valid_location_for_stdc_pragma (false);
> if (!omp_for_parse_state)
> - c_parser_statement_after_labels (parser, NULL);
> + c_parser_statement_after_labels (parser, NULL, NULL, a);
> else
> {
> /* In canonical loop nest form, nested loops can only appear
> @@ -7328,15 +7359,18 @@ c_parser_compound_statement_nostart (c_parser *parser)
> /* Parse all consecutive labels, possibly preceded by standard
> attributes. In this context, a statement is required, not a
> declaration, so attributes must be followed by a statement that is
> - not just a semicolon. */
> + not just a semicolon. Returns an attr_state. */
>
> -static void
> +static attr_state
> c_parser_all_labels (c_parser *parser)
> {
> + attr_state a = {};
> bool have_std_attrs;
> tree std_attrs = NULL;
> if ((have_std_attrs = c_parser_nth_token_starts_std_attributes (parser,
> 1)))
> - std_attrs = c_parser_std_attribute_specifier_sequence (parser);
> + std_attrs = c_parser_handle_musttail (parser,
> + c_parser_std_attribute_specifier_sequence (parser), a);
The formatting is a little off; let's do
{
std_attrs = c_parser_std_attribute_specifier_sequence (parser);
std_attrs = c_parser_handle_musttail (parser, std_attrs, a);
}
> while (c_parser_next_token_is_keyword (parser, RID_CASE)
> || c_parser_next_token_is_keyword (parser, RID_DEFAULT)
> || (c_parser_next_token_is (parser, CPP_NAME)
> @@ -7358,6 +7392,7 @@ c_parser_all_labels (c_parser *parser)
> }
> else if (have_std_attrs && c_parser_next_token_is (parser, CPP_SEMICOLON))
> c_parser_error (parser, "expected statement");
> + return a;
> }
I don't think this function will handle code like:
struct str
{
int a, b;
};
struct str
cstruct (int x)
{
if (x < 10)
L: // <====
[[gnu::musttail]] return cstruct (x + 1);
return ((struct str){ x, 0 });
}
which works in C++. I think you need another c_parser_handle_musttail after
the second call to c_parser_std_attribute_specifier_sequence.
Makes me wonder if c_parser_handle_musttail should be inlined into
c_parser_std_attribute_specifier_sequence...
> /* Parse a label (C90 6.6.1, C99 6.8.1, C11 6.8.1).
> @@ -7601,11 +7636,11 @@ c_parser_label (c_parser *parser, tree std_attrs)
> static void
> c_parser_statement (c_parser *parser, bool *if_p, location_t
> *loc_after_labels)
> {
> - c_parser_all_labels (parser);
> + attr_state a = c_parser_all_labels (parser);
> if (loc_after_labels)
> *loc_after_labels = c_parser_peek_token (parser)->location;
> parser->omp_attrs_forbidden_p = false;
> - c_parser_statement_after_labels (parser, if_p, NULL);
> + c_parser_statement_after_labels (parser, if_p, NULL, a);
> }
>
> /* Parse a statement, other than a labeled statement. CHAIN is a vector
> @@ -7614,11 +7649,11 @@ c_parser_statement (c_parser *parser, bool *if_p,
> location_t *loc_after_labels)
>
> IF_P is used to track whether there's a (possibly labeled) if statement
> which is not enclosed in braces and has an else clause. This is used to
> - implement -Wparentheses. */
> + implement -Wparentheses. A has an earlier parsed attribute state. */
Two spaces after a period. And s/has/is/?
>
> static void
> c_parser_statement_after_labels (c_parser *parser, bool *if_p,
> - vec<tree> *chain)
> + vec<tree> *chain, attr_state a)
I don't love the 'a' name for a parameter. Can we rename it to 'astate'?
> {
> location_t loc = c_parser_peek_token (parser)->location;
> tree stmt = NULL_TREE;
> @@ -7686,7 +7721,7 @@ c_parser_statement_after_labels (c_parser *parser, bool
> *if_p,
> c_parser_consume_token (parser);
> if (c_parser_next_token_is (parser, CPP_SEMICOLON))
> {
> - stmt = c_finish_return (loc, NULL_TREE, NULL_TREE);
> + stmt = c_finish_return (loc, NULL_TREE, NULL_TREE, a.musttail_p);
> c_parser_consume_token (parser);
> }
> else
> @@ -7695,7 +7730,7 @@ c_parser_statement_after_labels (c_parser *parser, bool
> *if_p,
> struct c_expr expr = c_parser_expression_conv (parser);
> mark_exp_read (expr.value);
> stmt = c_finish_return (EXPR_LOC_OR_LOC (expr.value, xloc),
> - expr.value, expr.original_type);
> + expr.value, expr.original_type,
> a.musttail_p);
> goto expect_semicolon;
> }
> break;
> diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
> index 15da875a0290..3dc6338bf061 100644
> --- a/gcc/c/c-tree.h
> +++ b/gcc/c/c-tree.h
> @@ -827,7 +827,7 @@ extern tree c_begin_stmt_expr (void);
> extern tree c_finish_stmt_expr (location_t, tree);
> extern tree c_process_expr_stmt (location_t, tree);
> extern tree c_finish_expr_stmt (location_t, tree);
> -extern tree c_finish_return (location_t, tree, tree);
> +extern tree c_finish_return (location_t, tree, tree, bool = false);
> extern tree c_finish_bc_stmt (location_t, tree, bool);
> extern tree c_finish_goto_label (location_t, tree);
> extern tree c_finish_goto_ptr (location_t, c_expr val);
> diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
> index ffcab7df4d3b..e47b10befd90 100644
> --- a/gcc/c/c-typeck.cc
> +++ b/gcc/c/c-typeck.cc
> @@ -11693,10 +11693,10 @@ c_finish_goto_ptr (location_t loc, c_expr val)
> to return, or a null pointer for `return;' with no value. LOC is
> the location of the return statement, or the location of the expression,
> if the statement has any. If ORIGTYPE is not NULL_TREE, it
> - is the original type of RETVAL. */
> + is the original type of RETVAL. MUSTTAIL_P indicates a musttail
> attribute. */
This line looks a bit too long.
>
> tree
> -c_finish_return (location_t loc, tree retval, tree origtype)
> +c_finish_return (location_t loc, tree retval, tree origtype, bool musttail_p)
> {
> tree valtype = TREE_TYPE (TREE_TYPE (current_function_decl)), ret_stmt;
> bool no_warning = false;
> @@ -11710,6 +11710,17 @@ c_finish_return (location_t loc, tree retval, tree
> origtype)
> warning_at (xloc, 0,
> "function declared %<noreturn%> has a %<return%> statement");
>
> + if (retval && musttail_p)
> + {
> + tree t = retval;
> + if (TREE_CODE (t) == TARGET_EXPR)
> + t = TARGET_EXPR_INITIAL (t);
> + if (TREE_CODE (t) != CALL_EXPR)
> + error_at (xloc, "cannot tail-call: return value must be call");
In C++, the message is "return value must be a call", let's use that here too.
Consider a c-family/ function taking a tree and a location_t that does:
+ if (t && TREE_CODE (t) == TARGET_EXPR)
+ t = TARGET_EXPR_INITIAL (t);
+ if (t && TREE_CODE (t) != CALL_EXPR)
+ error_at (token->location, "cannot tail-call: return value must be a
call");
+ else
+ CALL_EXPR_MUST_TAIL_CALL (t) = 1;
and use it here and in the C++ FE as well.
> + else
> + CALL_EXPR_MUST_TAIL_CALL (t) = 1;
> + }
Why not include the above in this...
> if (retval)
> {
...block, so that you don't have to check retval twice? retval isn't set
to null in the first block.
Marek