On Sun, Nov 12, 2023 at 08:02:42PM -1000, Iain Sandoe wrote: > This adds the ability to defer the validation of numeric attribute > arguments until the sequence is parsed if the attribute being > handled is one known to be 'clang form'. > > We do this by considering the arguments to be strings regardless > of content and defer the interpretation of those strings until the > argument processing.
I don't see any tests here nor in the C++ part of the patch. Is it possible to add some (I suppose for now only attribute availability)? FWIW, for chaining attributes it's best to use attr_chainon since that handles error_mark_node. Unfortunately that's currently only in cp/. > PR c++/109877 > > gcc/c-family/ChangeLog: > > * c-lex.cc (c_lex_with_flags): Allow for the case where > we wish to defer interpretation of numeric values until > parse time. > * c-pragma.h (C_LEX_NUMBER_AS_STRING): New. > > gcc/c/ChangeLog: > > * c-parser.cc (struct c_parser): Provide a flag to notify > that argument parsing should return attribute arguments > as string constants. > (c_lex_one_token): Act to defer numeric value validation. > (c_parser_clang_attribute_arguments): New. > (c_parser_gnu_attribute): Allow for clang-form GNU-style > attributes. > > Signed-off-by: Iain Sandoe <i...@sandoe.co.uk> > --- > gcc/c-family/c-lex.cc | 15 ++++++ > gcc/c-family/c-pragma.h | 3 ++ > gcc/c/c-parser.cc | 109 ++++++++++++++++++++++++++++++++++++++-- > 3 files changed, 122 insertions(+), 5 deletions(-) > > diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc > index 06c2453c89a..d535f5b460c 100644 > --- a/gcc/c-family/c-lex.cc > +++ b/gcc/c-family/c-lex.cc > @@ -533,6 +533,21 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned > char *cpp_flags, > > case CPP_NUMBER: > { > + /* If the user wants number-like entities to be returned as a raw > + string, then don't try to classify them, which emits unwanted > + diagnostics. */ > + if (lex_flags & C_LEX_NUMBER_AS_STRING) > + { > + /* build_string adds a trailing NUL at [len]. */ > + tree num_string = build_string (tok->val.str.len + 1, > + (const char *) tok->val.str.text); > + TREE_TYPE (num_string) = char_array_type_node; > + *value = num_string; > + /* We will effectively note this as CPP_N_INVALID, because we > + made no checks here. */ > + break; > + } > + > const char *suffix = NULL; > unsigned int flags = cpp_classify_number (parse_in, tok, &suffix, *loc); > > diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h > index 98177913053..11cde74f9f0 100644 > --- a/gcc/c-family/c-pragma.h > +++ b/gcc/c-family/c-pragma.h > @@ -276,6 +276,9 @@ extern void pragma_lex_discard_to_eol (); > #define C_LEX_STRING_NO_JOIN 2 /* Do not concatenate strings > nor translate them into execution > character set. */ > +#define C_LEX_NUMBER_AS_STRING 4 /* Do not classify a number, but > + instead return it as a raw > + string. */ > > /* This is not actually available to pragma parsers. It's merely a > convenient location to declare this function for c-lex, after > diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc > index 703f9570dbc..aaaa16cc05d 100644 > --- a/gcc/c/c-parser.cc > +++ b/gcc/c/c-parser.cc > @@ -217,6 +217,9 @@ struct GTY(()) c_parser { > should translate them to the execution character set (false > inside attributes). */ > BOOL_BITFIELD translate_strings_p : 1; > + /* True if we want to lex arbitrary number-like sequences as their > + string representation. */ > + BOOL_BITFIELD lex_number_as_string : 1; > > /* Objective-C specific parser/lexer information. */ > > @@ -308,10 +311,10 @@ c_lex_one_token (c_parser *parser, c_token *token, bool > raw = false) > > if (raw || vec_safe_length (parser->raw_tokens) == 0) > { > + int lex_flags = parser->lex_joined_string ? 0 : C_LEX_STRING_NO_JOIN; > + lex_flags |= parser->lex_number_as_string ? C_LEX_NUMBER_AS_STRING : 0; > token->type = c_lex_with_flags (&token->value, &token->location, > - &token->flags, > - (parser->lex_joined_string > - ? 0 : C_LEX_STRING_NO_JOIN)); > + &token->flags, lex_flags); > token->id_kind = C_ID_NONE; > token->keyword = RID_MAX; > token->pragma_kind = PRAGMA_NONE; > @@ -5210,6 +5213,98 @@ c_parser_gnu_attribute_any_word (c_parser *parser) > return attr_name; > } > > +/* Handle parsing clang-form attribute arguments, where we need to adjust > + the parsing rules to relate to a specific attribute. */ > + > +static tree > +c_parser_clang_attribute_arguments (c_parser *parser, tree /*attr_id*/) Why the second parameter if you don't use it? > +{ > + /* We can, if required, alter the parsing on the basis of the attribute. > + At present, we handle the availability attr, where ach entry can be : "each" > + identifier > + identifier=N.MM.Z > + identifier="string" > + followed by ',' or ) for the last entry*/ ". */" > + > + tree attr_args = NULL_TREE; > + if (c_parser_next_token_is (parser, CPP_NAME) > + && c_parser_peek_token (parser)->id_kind == C_ID_ID > + && c_parser_peek_2nd_token (parser)->type == CPP_COMMA) > + { > + tree platf = c_parser_peek_token (parser)->value; > + c_parser_consume_token (parser); > + attr_args = tree_cons (NULL_TREE, platf, NULL_TREE); > + } > + else > + { > + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, > + "expected a platform name followed by %<,%>"); > + return error_mark_node; > + } > + c_parser_consume_token (parser); /* consume the ',' */ > + do > + { > + tree name = NULL_TREE; > + tree value = NULL_TREE; > + > + if (c_parser_next_token_is (parser, CPP_NAME) > + && c_parser_peek_token (parser)->id_kind == C_ID_ID) > + { > + name = c_parser_peek_token (parser)->value; > + c_parser_consume_token (parser); > + } > + else > + { > + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, > + "expected an attribute keyword"); > + return error_mark_node; > + } > + if (c_parser_next_token_is (parser, CPP_EQ)) > + { > + c_parser_consume_token (parser); /* eat the '=' */ > + /* We need to bludgeon the lexer into not trying to interpret the > + xx.yy.zz form, since that just looks like a malformed float. > + Also, as a result of macro processing, we can have strig literals "string" > + that are in multiple pieces so, for this specific part of the > + parse, we need to join strings. */ > + bool saved_join_state = parser->lex_joined_string; > + parser->lex_number_as_string = 1; > + parser->lex_joined_string = 1; > + /* So look at the next token, expecting a string, or something that > + looks initially like a number, but might be a version number. */ > + c_parser_peek_token (parser); > + /* Done with the funky number parsing. */ > + parser->lex_number_as_string = 0; > + parser->lex_joined_string = saved_join_state; > + if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN) > + && c_parser_next_token_is_not (parser, CPP_COMMA)) > + { > + value = c_parser_peek_token (parser)->value; > + /* ???: check for error mark and early-return? */ It might be useful to have a test for this invalid case. > + c_parser_consume_token (parser); > + } > + else > + { > + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, > + "expected a value"); > + return error_mark_node; > + } > + } > + else if (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN) > + && c_parser_next_token_is_not (parser, CPP_COMMA)) > + { > + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, > + "expected %<,%> or %<=%>"); > + return error_mark_node; > + } > + if (c_parser_next_token_is (parser, CPP_COMMA)) > + c_parser_consume_token (parser); /* Just skip the comma. */ > + tree t = tree_cons (value, name, NULL); > + chainon (attr_args, t); > + } while (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN)); > + return attr_args; > +} > + > /* Parse attribute arguments. This is a common form of syntax > covering all currently valid GNU and standard attributes. > > @@ -5375,9 +5470,13 @@ c_parser_gnu_attribute (c_parser *parser, tree attrs, > attrs = chainon (attrs, attr); > return attrs; > } > - c_parser_consume_token (parser); > + c_parser_consume_token (parser); /* The '('. */ > > - tree attr_args > + tree attr_args; > + if (attribute_clang_form_p (attr_name)) > + attr_args = c_parser_clang_attribute_arguments (parser, attr_name); > + else > + attr_args > = c_parser_attribute_arguments (parser, > attribute_takes_identifier_p (attr_name), > false, > -- > 2.39.2 (Apple Git-143) > Marek