On Sun, Nov 12, 2023 at 08:02:42PM -1000, Iain Sandoe wrote:
> This adds the ability to defer the validation of numeric attribute
> arguments until the sequence is parsed if the attribute being
> handled is one known to be 'clang form'.
> 
> We do this by considering the arguments to be strings regardless
> of content and defer the interpretation of those strings until the
> argument processing.

I don't see any tests here nor in the C++ part of the patch.  Is it
possible to add some (I suppose for now only attribute availability)?

FWIW, for chaining attributes it's best to use attr_chainon since that
handles error_mark_node.  Unfortunately that's currently only in cp/.
 
>       PR c++/109877
> 
> gcc/c-family/ChangeLog:
> 
>       * c-lex.cc (c_lex_with_flags): Allow for the case where
>       we wish to defer interpretation of numeric values until
>       parse time.
>       * c-pragma.h (C_LEX_NUMBER_AS_STRING): New.
> 
> gcc/c/ChangeLog:
> 
>       * c-parser.cc (struct c_parser): Provide a flag to notify
>         that argument parsing should return attribute arguments
>         as string constants.
>       (c_lex_one_token): Act to defer numeric value validation.
>       (c_parser_clang_attribute_arguments): New.
>       (c_parser_gnu_attribute): Allow for clang-form GNU-style
>       attributes.
> 
> Signed-off-by: Iain Sandoe <i...@sandoe.co.uk>
> ---
>  gcc/c-family/c-lex.cc   |  15 ++++++
>  gcc/c-family/c-pragma.h |   3 ++
>  gcc/c/c-parser.cc       | 109 ++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 122 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc
> index 06c2453c89a..d535f5b460c 100644
> --- a/gcc/c-family/c-lex.cc
> +++ b/gcc/c-family/c-lex.cc
> @@ -533,6 +533,21 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned 
> char *cpp_flags,
>  
>      case CPP_NUMBER:
>        {
> +     /* If the user wants number-like entities to be returned as a raw
> +        string, then don't try to classify them, which emits unwanted
> +        diagnostics.  */
> +     if (lex_flags & C_LEX_NUMBER_AS_STRING)
> +       {
> +         /* build_string adds a trailing NUL at [len].  */
> +         tree num_string = build_string (tok->val.str.len + 1,
> +                                         (const char *) tok->val.str.text);
> +         TREE_TYPE (num_string) = char_array_type_node;
> +         *value = num_string;
> +         /* We will effectively note this as CPP_N_INVALID, because we
> +            made no checks here.  */
> +         break;
> +       }
> +
>       const char *suffix = NULL;
>       unsigned int flags = cpp_classify_number (parse_in, tok, &suffix, *loc);
>  
> diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
> index 98177913053..11cde74f9f0 100644
> --- a/gcc/c-family/c-pragma.h
> +++ b/gcc/c-family/c-pragma.h
> @@ -276,6 +276,9 @@ extern void pragma_lex_discard_to_eol ();
>  #define C_LEX_STRING_NO_JOIN   2 /* Do not concatenate strings
>                                      nor translate them into execution
>                                      character set.  */
> +#define C_LEX_NUMBER_AS_STRING         4 /* Do not classify a number, but
> +                                    instead return it as a raw
> +                                    string.  */
>  
>  /* This is not actually available to pragma parsers.  It's merely a
>     convenient location to declare this function for c-lex, after
> diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> index 703f9570dbc..aaaa16cc05d 100644
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -217,6 +217,9 @@ struct GTY(()) c_parser {
>       should translate them to the execution character set (false
>       inside attributes).  */
>    BOOL_BITFIELD translate_strings_p : 1;
> +  /* True if we want to lex arbitrary number-like sequences as their
> +     string representation.  */
> +  BOOL_BITFIELD lex_number_as_string : 1;
>  
>    /* Objective-C specific parser/lexer information.  */
>  
> @@ -308,10 +311,10 @@ c_lex_one_token (c_parser *parser, c_token *token, bool 
> raw = false)
>  
>    if (raw || vec_safe_length (parser->raw_tokens) == 0)
>      {
> +      int lex_flags = parser->lex_joined_string ? 0 : C_LEX_STRING_NO_JOIN;
> +      lex_flags |= parser->lex_number_as_string ? C_LEX_NUMBER_AS_STRING : 0;
>        token->type = c_lex_with_flags (&token->value, &token->location,
> -                                   &token->flags,
> -                                   (parser->lex_joined_string
> -                                    ? 0 : C_LEX_STRING_NO_JOIN));
> +                                   &token->flags, lex_flags);
>        token->id_kind = C_ID_NONE;
>        token->keyword = RID_MAX;
>        token->pragma_kind = PRAGMA_NONE;
> @@ -5210,6 +5213,98 @@ c_parser_gnu_attribute_any_word (c_parser *parser)
>    return attr_name;
>  }
>  
> +/* Handle parsing clang-form attribute arguments, where we need to adjust
> +   the parsing rules to relate to a specific attribute.  */
> +
> +static tree
> +c_parser_clang_attribute_arguments (c_parser *parser, tree /*attr_id*/)

Why the second parameter if you don't use it?

> +{
> +  /* We can, if required, alter the parsing on the basis of the attribute.
> +     At present, we handle the availability attr, where ach entry can be :

"each"

> +     identifier
> +     identifier=N.MM.Z
> +     identifier="string"
> +     followed by ',' or ) for the last entry*/

".  */"

> +
> +  tree attr_args = NULL_TREE;
> +  if (c_parser_next_token_is (parser, CPP_NAME)
> +      && c_parser_peek_token (parser)->id_kind == C_ID_ID
> +      && c_parser_peek_2nd_token (parser)->type == CPP_COMMA)
> +    {
> +      tree platf = c_parser_peek_token (parser)->value;
> +      c_parser_consume_token (parser);
> +      attr_args = tree_cons (NULL_TREE, platf, NULL_TREE);
> +    }
> +  else
> +    {
> +      c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
> +                             "expected a platform name followed by %<,%>");
> +      return error_mark_node;
> +    }
> +  c_parser_consume_token (parser); /* consume the ',' */
> +  do
> +    {
> +      tree name = NULL_TREE;
> +      tree value = NULL_TREE;
> +
> +      if (c_parser_next_token_is (parser, CPP_NAME)
> +       && c_parser_peek_token (parser)->id_kind == C_ID_ID)
> +     {
> +       name = c_parser_peek_token (parser)->value;
> +       c_parser_consume_token (parser);
> +     }
> +      else
> +     {
> +       c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
> +                                  "expected an attribute keyword");
> +       return error_mark_node;
> +     }
> +      if (c_parser_next_token_is (parser, CPP_EQ))
> +     {
> +       c_parser_consume_token (parser); /* eat the '=' */
> +       /* We need to bludgeon the lexer into not trying to interpret the
> +          xx.yy.zz form, since that just looks like a malformed float.
> +          Also, as a result of macro processing, we can have strig literals

"string"

> +          that are in multiple pieces so, for this specific part of the
> +          parse, we need to join strings.  */
> +       bool saved_join_state = parser->lex_joined_string;
> +       parser->lex_number_as_string = 1;
> +       parser->lex_joined_string = 1;
> +       /* So look at the next token, expecting a string, or something that
> +          looks initially like a number, but might be a version number.  */
> +       c_parser_peek_token (parser);
> +       /* Done with the funky number parsing.  */
> +       parser->lex_number_as_string = 0;
> +       parser->lex_joined_string = saved_join_state;
> +       if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN)
> +           && c_parser_next_token_is_not (parser, CPP_COMMA))
> +         {
> +           value = c_parser_peek_token (parser)->value;
> +           /* ???: check for error mark and early-return?  */

It might be useful to have a test for this invalid case.

> +           c_parser_consume_token (parser);
> +         }
> +       else
> +         {
> +           c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
> +                                      "expected a value");
> +           return error_mark_node;
> +         }
> +     }
> +      else if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN)
> +            && c_parser_next_token_is_not (parser, CPP_COMMA))
> +     {
> +       c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
> +                                  "expected %<,%> or %<=%>");
> +       return error_mark_node;
> +     }
> +    if (c_parser_next_token_is (parser, CPP_COMMA))
> +      c_parser_consume_token (parser); /* Just skip the comma.  */
> +    tree t = tree_cons (value, name, NULL);
> +    chainon (attr_args, t);
> +  } while (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN));
> +  return attr_args;
> +}
> +
>  /* Parse attribute arguments.  This is a common form of syntax
>     covering all currently valid GNU and standard attributes.
>  
> @@ -5375,9 +5470,13 @@ c_parser_gnu_attribute (c_parser *parser, tree attrs,
>        attrs = chainon (attrs, attr);
>        return attrs;
>      }
> -  c_parser_consume_token (parser);
> +  c_parser_consume_token (parser); /* The '('.  */
>  
> -  tree attr_args
> +  tree attr_args;
> +  if (attribute_clang_form_p (attr_name))
> +    attr_args = c_parser_clang_attribute_arguments (parser, attr_name);
> +  else
> +    attr_args
>      = c_parser_attribute_arguments (parser,
>                                   attribute_takes_identifier_p (attr_name),
>                                   false,
> -- 
> 2.39.2 (Apple Git-143)
> 

Marek

Reply via email to