Re: [PATCH v20 02/40] c-family, c++: Look up built-in traits via identifier node

Patrick Palka Mon, 16 Oct 2023 07:55:34 -0700

On Sun, 15 Oct 2023, Ken Matsui wrote:

> Since RID_MAX soon reaches 255 and all built-in traits are used approximately
> once in a C++ translation unit, this patch removes all RID values for built-in
> traits and uses the identifier node to look up the specific trait.  Rather
> than holding traits as keywords, we set all trait identifiers as cik_trait,
> which is a new cp_identifier_kind.  As cik_reserved_for_udlit was unused and
> cp_identifier_kind is 3 bits, we replaced the unused field with the new
> cik_trait.  Also, the later patch handles a subsequent token to the built-in
> identifier so that we accept the use of non-function-like built-in trait
> identifiers.


Thanks, this looks great!  Some review comments below.

> 
> gcc/c-family/ChangeLog:
> 
>       * c-common.cc (c_common_reswords): Remove all mappings of
>       built-in traits.
>       * c-common.h (enum rid): Remove all RID values for built-in traits.
> 
> gcc/cp/ChangeLog:
> 
>       * cp-objcp-common.cc (names_builtin_p): Remove all RID value
>       cases for built-in traits.  Check for built-in traits via
>       the new cik_trait kind.
>       * cp-tree.h (enum cp_trait_kind): Set its underlying type to
>       addr_space_t.
>       (struct cp_trait): New struct to hold trait information.
>       (cp_traits): New array to hold a mapping to all traits.
>       (cik_reserved_for_udlit): Rename to ...
>       (cik_trait): ... this.
>       (IDENTIFIER_ANY_OP_P): Exclude cik_trait.
>       (IDENTIFIER_TRAIT_P): New macro to detect cik_trait.
>       * lex.cc (init_cp_traits): New function to set cik_trait for all
>       built-in trait identifiers.

We should mention setting IDENTIFIER_CP_INDEX as well.

>       (cxx_init): Call init_cp_traits function.
>       * parser.cc (cp_traits): Define its values, declared in cp-tree.h.
>       (cp_lexer_lookup_trait): New function to look up a
>       built-in trait by IDENTIFIER_CP_INDEX.
>       (cp_lexer_lookup_trait_expr): Likewise, look up an
>       expression-yielding built-in trait.
>       (cp_lexer_lookup_trait_type): Likewise, look up a type-yielding
>       built-in trait.
>       (cp_keyword_starts_decl_specifier_p): Remove all RID value cases
>       for built-in traits.
>       (cp_lexer_next_token_is_decl_specifier_keyword): Handle
>       type-yielding built-in traits.
>       (cp_parser_primary_expression): Remove all RID value cases for
>       built-in traits.  Handle expression-yielding built-in traits.
>       (cp_parser_trait): Handle cp_trait instead of enum rid.
>       (cp_parser_simple_type_specifier): Remove all RID value cases
>       for built-in traits.  Handle type-yielding built-in traits.
> 
> Co-authored-by: Patrick Palka <[email protected]>
> Signed-off-by: Ken Matsui <[email protected]>
> ---
>  gcc/c-family/c-common.cc  |   7 --
>  gcc/c-family/c-common.h   |   5 --
>  gcc/cp/cp-objcp-common.cc |   8 +--
>  gcc/cp/cp-tree.h          |  31 ++++++---
>  gcc/cp/lex.cc             |  21 ++++++
>  gcc/cp/parser.cc          | 141 ++++++++++++++++++++++++--------------
>  6 files changed, 139 insertions(+), 74 deletions(-)
> 
> diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
> index f044db5b797..21fd333ef57 100644
> --- a/gcc/c-family/c-common.cc
> +++ b/gcc/c-family/c-common.cc
> @@ -508,13 +508,6 @@ const struct c_common_resword c_common_reswords[] =
>    { "wchar_t",               RID_WCHAR,      D_CXXONLY },
>    { "while",         RID_WHILE,      0 },
>  
> -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
> -  { NAME,            RID_##CODE,     D_CXXONLY },
> -#include "cp/cp-trait.def"
> -#undef DEFTRAIT
> -  /* An alias for __is_same.  */
> -  { "__is_same_as",  RID_IS_SAME,    D_CXXONLY },
> -
>    /* C++ transactional memory.  */
>    { "synchronized",  RID_SYNCHRONIZED, D_CXX_OBJC | D_TRANSMEM },
>    { "atomic_noexcept",       RID_ATOMIC_NOEXCEPT, D_CXXONLY | D_TRANSMEM },
> diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
> index 1fdba7ef3ea..051a442e0f4 100644
> --- a/gcc/c-family/c-common.h
> +++ b/gcc/c-family/c-common.h
> @@ -168,11 +168,6 @@ enum rid
>    RID_BUILTIN_LAUNDER,
>    RID_BUILTIN_BIT_CAST,
>  
> -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
> -  RID_##CODE,
> -#include "cp/cp-trait.def"
> -#undef DEFTRAIT
> -
>    /* C++11 */
>    RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,
>  
> diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc
> index 93b027b80ce..b1adacfec07 100644
> --- a/gcc/cp/cp-objcp-common.cc
> +++ b/gcc/cp/cp-objcp-common.cc
> @@ -421,6 +421,10 @@ names_builtin_p (const char *name)
>       }
>      }
>  
> +  /* Check for built-in traits.  */
> +  if (IDENTIFIER_TRAIT_P (id))
> +    return true;
> +
>    /* Also detect common reserved C++ words that aren't strictly built-in
>       functions.  */
>    switch (C_RID_CODE (id))
> @@ -434,10 +438,6 @@ names_builtin_p (const char *name)
>      case RID_BUILTIN_ASSOC_BARRIER:
>      case RID_BUILTIN_BIT_CAST:
>      case RID_OFFSETOF:
> -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
> -    case RID_##CODE:
> -#include "cp-trait.def"
> -#undef DEFTRAIT
>        return true;
>      default:
>        break;
> diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
> index 6e34952da99..583abb2e79a 100644
> --- a/gcc/cp/cp-tree.h
> +++ b/gcc/cp/cp-tree.h
> @@ -1226,7 +1226,7 @@ enum cp_identifier_kind {
>    cik_simple_op = 4, /* Non-assignment operator name.  */
>    cik_assign_op = 5, /* An assignment operator name.  */
>    cik_conv_op = 6,   /* Conversion operator name.  */
> -  cik_reserved_for_udlit = 7,        /* Not yet in use  */
> +  cik_trait = 7,     /* Built-in trait name.  */
>    cik_max
>  };
>  
> @@ -1271,9 +1271,9 @@ enum cp_identifier_kind {
>      & IDENTIFIER_KIND_BIT_0 (NODE))
>  
>  /* True if this identifier is for any operator name (including
> -   conversions).  Value 4, 5, 6 or 7.  */
> +   conversions).  Value 4, 5, or 6.  */
>  #define IDENTIFIER_ANY_OP_P(NODE)            \
> -  (IDENTIFIER_KIND_BIT_2 (NODE))
> +  (IDENTIFIER_KIND_BIT_2 (NODE) && !IDENTIFIER_TRAIT_P (NODE))
>  
>  /* True if this identifier is for an overloaded operator. Values 4, 5.  */
>  #define IDENTIFIER_OVL_OP_P(NODE)            \
> @@ -1286,12 +1286,18 @@ enum cp_identifier_kind {
>     & IDENTIFIER_KIND_BIT_0 (NODE))
>  
>  /* True if this identifier is the name of a type-conversion
> -   operator.  Value 7.  */
> +   operator.  Value 6.  */
>  #define IDENTIFIER_CONV_OP_P(NODE)           \
>    (IDENTIFIER_ANY_OP_P (NODE)                        \
>     & IDENTIFIER_KIND_BIT_1 (NODE)            \
>     & (!IDENTIFIER_KIND_BIT_0 (NODE)))
>  
> +/* True if this identifier is the name of a built-in trait.  */
> +#define IDENTIFIER_TRAIT_P(NODE)             \
> +  (IDENTIFIER_KIND_BIT_0 (NODE)                      \
> +   && IDENTIFIER_KIND_BIT_1 (NODE)           \
> +   && IDENTIFIER_KIND_BIT_2 (NODE))
> +
>  /* True if this identifier is a new or delete operator.  */
>  #define IDENTIFIER_NEWDEL_OP_P(NODE)         \
>    (IDENTIFIER_OVL_OP_P (NODE)                        \
> @@ -1375,16 +1381,25 @@ struct GTY (()) tree_argument_pack_select {
>    int index;
>  };
>  
> -/* The different kinds of traits that we encounter.  */
> -
> -enum cp_trait_kind
> -{
> +/* The different kinds of traits that we encounter.  The size is limited to
> +   addr_space_t since a trait is looked up by IDENTIFIER_CP_INDEX.  */
> +enum cp_trait_kind : addr_space_t {
>  #define DEFTRAIT(TCC, CODE, NAME, ARITY) \
>    CPTK_##CODE,
>  #include "cp-trait.def"
>  #undef DEFTRAIT
>  };
>  
> +/* The trait type.  */
> +struct cp_trait {
> +  short arity;
> +  cp_trait_kind kind;
> +  bool type;

Could we also store the const char* name of each trait here, so that ...

> +};
> +
> +/* The trait table.  */
> +extern const struct cp_trait cp_traits[];
> +
>  /* The types that we are processing.  */
>  #define TRAIT_EXPR_TYPE1(NODE) \
>    (((struct tree_trait_expr *)TRAIT_EXPR_CHECK (NODE))->type1)
> diff --git a/gcc/cp/lex.cc b/gcc/cp/lex.cc
> index 64bcfb18196..16a82a12a02 100644
> --- a/gcc/cp/lex.cc
> +++ b/gcc/cp/lex.cc
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "langhooks.h"
>  
>  static int interface_strcmp (const char *);
> +static void init_cp_traits (void);
>  static void init_cp_pragma (void);
>  
>  static tree parse_strconst_pragma (const char *, int);
> @@ -283,6 +284,25 @@ init_reswords (void)
>      }
>  }
>  
> +/* Initialize the C++ traits.  */
> +static void
> +init_cp_traits (void)
> +{
> +  tree id;
> +
> +#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
> +  id = get_identifier (NAME); \
> +  IDENTIFIER_CP_INDEX (id) = CPTK_##CODE; \
> +  set_identifier_kind (id, cik_trait);
> +#include "cp/cp-trait.def"
> +#undef DEFTRAIT

... we could replace this straight-line code with a loop over cp_traits?
It'd make cp_traits bigger but init_cp_traits should get much smaller,
which should be a net win in terms of binary size.

> +
> +  /* An alias for __is_same.  */
> +  id = get_identifier ("__is_same_as");
> +  IDENTIFIER_CP_INDEX (id) = CPTK_IS_SAME;
> +  set_identifier_kind (id, cik_trait);
> +}
> +
>  static void
>  init_cp_pragma (void)
>  {
> @@ -324,6 +344,7 @@ cxx_init (void)
>    input_location = BUILTINS_LOCATION;
>  
>    init_reswords ();
> +  init_cp_traits ();
>    init_tree ();
>    init_cp_semantics ();
>    init_operators ();
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index f3abae716fe..eba5272be03 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -246,6 +246,12 @@ static void cp_lexer_start_debugging
>    (cp_lexer *) ATTRIBUTE_UNUSED;
>  static void cp_lexer_stop_debugging
>    (cp_lexer *) ATTRIBUTE_UNUSED;
> +static const cp_trait *cp_lexer_lookup_trait
> +  (const cp_token *);
> +static const cp_trait *cp_lexer_lookup_trait_expr
> +  (const cp_token *);
> +static const cp_trait *cp_lexer_lookup_trait_type
> +  (const cp_token *);
>  
>  static cp_token_cache *cp_token_cache_new
>    (cp_token *, cp_token *);
> @@ -279,6 +285,19 @@ static FILE *cp_lexer_debug_stream;
>     sizeof, typeof, or alignof.  */
>  int cp_unevaluated_operand;
>  
> +/* The trait table, declared in cp-tree.h.  */
> +const cp_trait cp_traits[] =
> +{
> +#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
> +  { ARITY, CPTK_##CODE, (TCC == tcc_type) },
> +#include "cp-trait.def"
> +#undef DEFTRAIT
> +};
> +/* The trait table cannot have more than 255 (addr_space_t) entries since
> +   the index is retrieved through IDENTIFIER_CP_INDEX.  */
> +static_assert(ARRAY_SIZE (cp_traits) <= 255,
> +              "cp_traits array cannot have more than 255 entries");
> +
>  /* Dump up to NUM tokens in BUFFER to FILE starting with token
>     START_TOKEN.  If START_TOKEN is NULL, the dump starts with the
>     first token in BUFFER.  If NUM is 0, dump all the tokens.  If
> @@ -1167,12 +1186,6 @@ cp_keyword_starts_decl_specifier_p (enum rid keyword)
>      case RID_CONSTEVAL:
>        return true;
>  
> -#define DEFTRAIT_TYPE(CODE, NAME, ARITY) \
> -    case RID_##CODE:
> -#include "cp-trait.def"
> -#undef DEFTRAIT_TYPE
> -      return true;
> -
>      default:
>        if (keyword >= RID_FIRST_INT_N
>         && keyword < RID_FIRST_INT_N + NUM_INT_N_ENTS
> @@ -1182,6 +1195,48 @@ cp_keyword_starts_decl_specifier_p (enum rid keyword)
>      }
>  }
>  
> +/* Look ups the corresponding built-in trait if a given token is
> +   a built-in trait.  Otherwise, returns nullptr.  */
> +
> +static const cp_trait *
> +cp_lexer_lookup_trait (const cp_token *token)
> +{
> +  tree id = token->u.value;
> +
> +  if (token->type == CPP_NAME
> +      && TREE_CODE (id) == IDENTIFIER_NODE

The value of a CPP_NAME token is always an IDENTIFIER_NODE, so this
check should be redundant.  Also we should access the u.value union member
only after the CPP_NAME check, since for some other token kinds u.value
isn't the active member (and reading from it would be undefined behavior
strictly speaking).

> +      && IDENTIFIER_TRAIT_P (id))
> +    return &cp_traits[IDENTIFIER_CP_INDEX (id)];
> +
> +  return nullptr;
> +}
> +
> +/* Similarly, but only if the token is an expression-yielding
> +   built-in trait.  */
> +
> +static const cp_trait *
> +cp_lexer_lookup_trait_expr (const cp_token *token)
> +{
> +  const cp_trait *trait = cp_lexer_lookup_trait (token);
> +  if (trait && !trait->type)
> +    return trait;
> +
> +  return nullptr;
> +}
> +
> +/* Similarly, but only if the token is a type-yielding
> +   built-in trait.  */
> +
> +static const cp_trait *
> +cp_lexer_lookup_trait_type (const cp_token *token)
> +{
> +  const cp_trait *trait = cp_lexer_lookup_trait (token);
> +  if (trait && trait->type)
> +    return trait;
> +
> +  return nullptr;
> +}
> +
>  /* Return true if the next token is a keyword for a decl-specifier.  */
>  
>  static bool
> @@ -1190,6 +1245,8 @@ cp_lexer_next_token_is_decl_specifier_keyword (cp_lexer 
> *lexer)
>    cp_token *token;
>  
>    token = cp_lexer_peek_token (lexer);
> +  if (cp_lexer_lookup_trait_type (token))
> +    return true;
>    return cp_keyword_starts_decl_specifier_p (token->keyword);
>  }
>  
> @@ -2854,7 +2911,7 @@ static void cp_parser_late_parsing_default_args
>  static tree cp_parser_sizeof_operand
>    (cp_parser *, enum rid);
>  static cp_expr cp_parser_trait
> -  (cp_parser *, enum rid);
> +  (cp_parser *, const cp_trait *);
>  static bool cp_parser_declares_only_class_p
>    (cp_parser *);
>  static void cp_parser_set_storage_class
> @@ -6021,12 +6078,6 @@ cp_parser_primary_expression (cp_parser *parser,
>       case RID_OFFSETOF:
>         return cp_parser_builtin_offsetof (parser);
>  
> -#define DEFTRAIT_EXPR(CODE, NAME, ARITY) \
> -     case RID_##CODE:
> -#include "cp-trait.def"
> -#undef DEFTRAIT_EXPR
> -       return cp_parser_trait (parser, token->keyword);
> -
>       // C++ concepts
>       case RID_REQUIRES:
>         return cp_parser_requires_expression (parser);
> @@ -6065,6 +6116,12 @@ cp_parser_primary_expression (cp_parser *parser,
>        `::' as the beginning of a qualified-id, or the "operator"
>        keyword.  */
>      case CPP_NAME:
> +      {
> +     const cp_trait* trait = cp_lexer_lookup_trait_expr (token);
> +     if (trait)

A tiny nit, but we could remove the extra block scope here by doing
'if (const cp_trait* trait = ...)' instead.

> +       return cp_parser_trait (parser, trait);
> +      }
> +      /* FALLTHRU */
>      case CPP_SCOPE:
>      case CPP_TEMPLATE_ID:
>      case CPP_NESTED_NAME_SPECIFIER:
> @@ -11033,28 +11090,11 @@ cp_parser_builtin_offsetof (cp_parser *parser)
>  /* Parse a builtin trait expression or type.  */
>  
>  static cp_expr
> -cp_parser_trait (cp_parser* parser, enum rid keyword)
> +cp_parser_trait (cp_parser* parser, const cp_trait* trait)
>  {
> -  cp_trait_kind kind;
>    tree type1, type2 = NULL_TREE;
> -  bool binary = false;
> -  bool variadic = false;
> -  bool type = false;
> -
> -  switch (keyword)
> -    {
> -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
> -    case RID_##CODE:                  \
> -      kind = CPTK_##CODE;             \
> -      binary = (ARITY == 2);          \
> -      variadic = (ARITY == -1);               \
> -      type = (TCC == tcc_type);               \
> -      break;
> -#include "cp-trait.def"
> -#undef DEFTRAIT
> -    default:
> -      gcc_unreachable ();
> -    }
> +  const bool binary = (trait->arity == 2);
> +  const bool variadic = (trait->arity == -1);

Could we continue defining the local variables 'kind' and 'type' here so
that we don't have to adjust their uses in the rest of the function?
That should yield a smaller diff for this function.

>  
>    /* Get location of initial token.  */
>    location_t start_loc = cp_lexer_peek_token (parser->lexer)->location;
> @@ -11063,12 +11103,12 @@ cp_parser_trait (cp_parser* parser, enum rid 
> keyword)
>    cp_lexer_consume_token (parser->lexer);
>  
>    matching_parens parens;
> -  if (kind == CPTK_TYPE_PACK_ELEMENT)
> +  if (trait->kind == CPTK_TYPE_PACK_ELEMENT)
>      cp_parser_require (parser, CPP_LESS, RT_LESS);
>    else
>      parens.require_open (parser);
>  
> -  if (kind == CPTK_IS_DEDUCIBLE)
> +  if (trait->kind == CPTK_IS_DEDUCIBLE)
>      {
>        const cp_token* token = cp_lexer_peek_token (parser->lexer);
>        type1 = cp_parser_id_expression (parser,
> @@ -11079,7 +11119,7 @@ cp_parser_trait (cp_parser* parser, enum rid keyword)
>                                      /*optional_p=*/false);
>        type1 = cp_parser_lookup_name_simple (parser, type1, token->location);
>      }
> -  else if (kind == CPTK_TYPE_PACK_ELEMENT)
> +  else if (trait->kind == CPTK_TYPE_PACK_ELEMENT)
>      /* __type_pack_element takes an expression as its first argument and uses
>         template-id syntax instead of function call syntax (for consistency
>         with Clang).  We special case these properties of __type_pack_element
> @@ -11094,7 +11134,7 @@ cp_parser_trait (cp_parser* parser, enum rid keyword)
>    if (type1 == error_mark_node)
>      return error_mark_node;
>  
> -  if (kind == CPTK_TYPE_PACK_ELEMENT)
> +  if (trait->kind == CPTK_TYPE_PACK_ELEMENT)
>      {
>        cp_parser_require (parser, CPP_COMMA, RT_COMMA);
>        tree trailing = cp_parser_enclosed_template_argument_list (parser);
> @@ -11144,7 +11184,7 @@ cp_parser_trait (cp_parser* parser, enum rid keyword)
>      }
>  
>    location_t finish_loc = cp_lexer_peek_token (parser->lexer)->location;
> -  if (kind == CPTK_TYPE_PACK_ELEMENT)
> +  if (trait->kind == CPTK_TYPE_PACK_ELEMENT)
>      /* cp_parser_enclosed_template_argument_list above already took care
>         of parsing the closing '>'.  */;
>    else
> @@ -11158,17 +11198,17 @@ cp_parser_trait (cp_parser* parser, enum rid 
> keyword)
>  
>    /* Complete the trait expression, which may mean either processing
>       the trait expr now or saving it for template instantiation.  */
> -  switch (kind)
> +  switch (trait->kind)
>      {
>      case CPTK_BASES:
>        return cp_expr (finish_bases (type1, false), trait_loc);
>      case CPTK_DIRECT_BASES:
>        return cp_expr (finish_bases (type1, true), trait_loc);
>      default:
> -      if (type)
> -     return finish_trait_type (kind, type1, type2, tf_warning_or_error);
> +      if (trait->type)
> +     return finish_trait_type (trait->kind, type1, type2, 
> tf_warning_or_error);
>        else
> -     return finish_trait_expr (trait_loc, kind, type1, type2);
> +     return finish_trait_expr (trait_loc, trait->kind, type1, type2);
>      }
>  }
>  
> @@ -20081,20 +20121,21 @@ cp_parser_simple_type_specifier (cp_parser* parser,
>  
>        return type;
>  
> -#define DEFTRAIT_TYPE(CODE, NAME, ARITY) \
> -    case RID_##CODE:
> -#include "cp-trait.def"
> -#undef DEFTRAIT_TYPE
> -      type = cp_parser_trait (parser, token->keyword);
> +    default:
> +      break;
> +    }
> +
> +  /* If token is a type-yielding built-in traits, parse it.  */
> +  const cp_trait* trait = cp_lexer_lookup_trait_type (token);
> +  if (trait)
> +    {
> +      type = cp_parser_trait (parser, trait);
>        if (decl_specs)
>       cp_parser_set_decl_spec_type (decl_specs, type,
>                                     token,
>                                     /*type_definition_p=*/false);
>  
>        return type;
> -
> -    default:
> -      break;
>      }
>  
>    /* If token is an already-parsed decltype not followed by ::,
> -- 
> 2.42.0
> 
>

Re: [PATCH v20 02/40] c-family, c++: Look up built-in traits via identifier node

Reply via email to