On Thu, May 22, 2025 at 12:16:07PM +1000, Nathaniel Shead wrote:
> I'm not sure if there might be a better way to retrieve the prefix back
> off an IDENTIFIER_NODE?  I'm also not sure if IDENTIFIER_INTERNAL_P
> could ever clash with IDENTIFIER_TRANSPARENT_ALIAS; I'm pretty sure not
> (and it looks like nothing ever sets that flag anyway, that I could
> find?) but would appreciate any other thoughts.
> 
> I also wanted a run test (or at least a link test) using
> -fsanitize=undefined, but I couldn't work out how to do so under
> modules.exp, so I instead placed one under ubsan.exp.
> 

Ping.

> -- >8 --
> 
> The frontend creates some variables that need to be given unique names
> for the TU so that they can unambiguously be accessed.  Historically
> this has been done with a global counter local to each place that needs
> an internal label, but this doesn't work with modules as depending on
> what declarations have been imported, some counter values may have
> already been used.
> 
> This patch reworks the situation to instead have a single collection of
> counters for the TU, and a new function 'generate_internal_label' that
> gets the next label with given prefix using that counter.  Modules
> streaming can then use this function to regenerate new names on
> stream-in for any such decls, guaranteeing uniqueness within the TU.
> 
> These labels should only be used for internal entities so there should
> be no issues with the names differing from TU to TU; we will need to
> handle this if we ever start checking ODR of definitions we're merging
> but that's an issue for later.
> 
> For proof of concept, this patch makes use of the new API for
> __builtin_source_location and ubsan; there are probably other places
> in the frontend where this change will need to be made as well.
> One other change this exposes is that both of these components rely
> on the definition of the VAR_DECLs they create, so stream that too
> for uncontexted variables.
> 
>       PR c++/98735
>       PR c++/118904
> 
> gcc/cp/ChangeLog:
> 
>       * cp-gimplify.cc (source_location_id): Remove.
>       (fold_builtin_source_location): Use generate_internal_label.
>       * module.cc (enum tree_tag): Add 'tt_internal_id' enumerator.
>       (trees_out::tree_value): Adjust assertion, write definitions
>       of uncontexted VAR_DECLs.
>       (trees_in::tree_value): Read variable definitions.
>       (trees_out::tree_node): Write internal labels, adjust assert.
>       (trees_in::tree_node): Read internal labels.
> 
> gcc/ChangeLog:
> 
>       * tree.cc (struct identifier_hash): New type.
>       (struct identifier_count_traits): New traits.
>       (internal_label_nums): New hash map.
>       (generate_internal_label): New function.
>       (prefix_for_internal_label): New function.
>       * tree.h (IDENTIFIER_INTERNAL_P): New macro.
>       (generate_internal_label): Declare.
>       (prefix_for_internal_label): Declare.
>       * ubsan.cc (ubsan_ids): Remove.
>       (ubsan_type_descriptor): Use generate_internal_label.
>       (ubsan_create_data): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>       * g++.dg/modules/src-loc-1.h: New test.
>       * g++.dg/modules/src-loc-1_a.H: New test.
>       * g++.dg/modules/src-loc-1_b.C: New test.
>       * g++.dg/modules/src-loc-1_c.C: New test.
>       * g++.dg/modules/ubsan-1_a.C: New test.
>       * g++.dg/modules/ubsan-1_b.C: New test.
>       * g++.dg/ubsan/module-1-aux.cc: New test.
>       * g++.dg/ubsan/module-1.C: New test.
> 
> Signed-off-by: Nathaniel Shead <nathanielosh...@gmail.com>
> ---
>  gcc/cp/cp-gimplify.cc                      |  5 +--
>  gcc/cp/module.cc                           | 51 +++++++++++++++++++---
>  gcc/testsuite/g++.dg/modules/src-loc-1.h   |  6 +++
>  gcc/testsuite/g++.dg/modules/src-loc-1_a.H |  7 +++
>  gcc/testsuite/g++.dg/modules/src-loc-1_b.C |  5 +++
>  gcc/testsuite/g++.dg/modules/src-loc-1_c.C | 16 +++++++
>  gcc/testsuite/g++.dg/modules/ubsan-1_a.C   | 10 +++++
>  gcc/testsuite/g++.dg/modules/ubsan-1_b.C   | 14 ++++++
>  gcc/testsuite/g++.dg/ubsan/module-1-aux.cc | 12 +++++
>  gcc/testsuite/g++.dg/ubsan/module-1.C      | 11 +++++
>  gcc/tree.cc                                | 51 ++++++++++++++++++++++
>  gcc/tree.h                                 |  7 +++
>  gcc/ubsan.cc                               | 16 ++-----
>  13 files changed, 188 insertions(+), 23 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1.h
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_a.H
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_b.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_c.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/ubsan-1_a.C
>  create mode 100644 gcc/testsuite/g++.dg/modules/ubsan-1_b.C
>  create mode 100644 gcc/testsuite/g++.dg/ubsan/module-1-aux.cc
>  create mode 100644 gcc/testsuite/g++.dg/ubsan/module-1.C
> 
> diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
> index f7bd453bc5e..7414064c68a 100644
> --- a/gcc/cp/cp-gimplify.cc
> +++ b/gcc/cp/cp-gimplify.cc
> @@ -3874,7 +3874,6 @@ struct source_location_table_entry_hash
>  
>  static GTY(()) hash_table <source_location_table_entry_hash>
>    *source_location_table;
> -static GTY(()) unsigned int source_location_id;
>  
>  /* Fold the __builtin_source_location () call T.  */
>  
> @@ -3907,9 +3906,7 @@ fold_builtin_source_location (const_tree t)
>      var = entryp->var;
>    else
>      {
> -      char tmp_name[32];
> -      ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lsrc_loc", 
> source_location_id++);
> -      var = build_decl (loc, VAR_DECL, get_identifier (tmp_name),
> +      var = build_decl (loc, VAR_DECL, generate_internal_label ("Lsrc_loc"),
>                       source_location_impl);
>        TREE_STATIC (var) = 1;
>        TREE_PUBLIC (var) = 0;
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index 765d17935c5..160b2448311 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -2811,12 +2811,13 @@ enum tree_tag {
>    tt_decl,           /* By-value mergeable decl.  */
>    tt_tpl_parm,               /* Template parm.  */
>  
> -  /* The ordering of the following 4 is relied upon in
> +  /* The ordering of the following 5 is relied upon in
>       trees_out::tree_node.  */
>    tt_id,             /* Identifier node.  */
>    tt_conv_id,                /* Conversion operator name.  */
>    tt_anon_id,                /* Anonymous name.  */
>    tt_lambda_id,              /* Lambda name.  */
> +  tt_internal_id,       /* Internal name.  */
>  
>    tt_typedef_type,   /* A (possibly implicit) typedefed type.  */
>    tt_derived_type,   /* A type derived from another type.  */
> @@ -9662,7 +9663,9 @@ trees_out::tree_value (tree t)
>                         && (TREE_CODE (t) != TYPE_DECL
>                             || (DECL_ARTIFICIAL (t) && !DECL_CONTEXT (t)))
>                         && (TREE_CODE (t) != VAR_DECL
> -                           || (!DECL_NAME (t) && !DECL_CONTEXT (t)))
> +                           || ((!DECL_NAME (t)
> +                                || IDENTIFIER_INTERNAL_P (DECL_NAME (t)))
> +                               && !DECL_CONTEXT (t)))
>                         && TREE_CODE (t) != FUNCTION_DECL));
>  
>    if (streaming_p ())
> @@ -9728,6 +9731,14 @@ trees_out::tree_value (tree t)
>         && dump ("Written type:%d %C:%N", type_tag, TREE_CODE (type), type);
>      }
>  
> +  /* For uncontexted VAR_DECLs we need to stream the definition so that
> +     importers can recreate their value.  */
> +  if (TREE_CODE (t) == VAR_DECL)
> +    {
> +      gcc_checking_assert (!DECL_NONTRIVIALLY_INITIALIZED_P (t));
> +      tree_node (DECL_INITIAL (t));
> +    }
> +
>    if (streaming_p ())
>      dump (dumper::TREE) && dump ("Written tree:%d %C:%N", tag, TREE_CODE 
> (t), t);
>  }
> @@ -9817,6 +9828,13 @@ bail:
>       && dump ("Read type:%d %C:%N", type_tag, TREE_CODE (type), type);
>      }
>  
> +  if (TREE_CODE (t) == VAR_DECL)
> +    {
> +      DECL_INITIAL (t) = tree_node ();
> +      if (TREE_STATIC (t))
> +     varpool_node::finalize_decl (t);
> +    }
> +
>    if (TREE_CODE (t) == LAMBDA_EXPR
>        && CLASSTYPE_LAMBDA_EXPR (TREE_TYPE (t)))
>      {
> @@ -9959,10 +9977,13 @@ trees_out::tree_node (tree t)
>  
>    if (TREE_CODE (t) == IDENTIFIER_NODE)
>      {
> -      /* An identifier node -> tt_id, tt_conv_id, tt_anon_id, tt_lambda_id.  
> */
> +      /* An identifier node -> tt_id, tt_conv_id, tt_anon_id, tt_lambda_id,
> +      tt_internal_id.  */
>        int code = tt_id;
>        if (IDENTIFIER_ANON_P (t))
>       code = IDENTIFIER_LAMBDA_P (t) ? tt_lambda_id : tt_anon_id;
> +      else if (IDENTIFIER_INTERNAL_P (t))
> +     code = tt_internal_id;
>        else if (IDENTIFIER_CONV_OP_P (t))
>       code = tt_conv_id;
>  
> @@ -9977,13 +9998,15 @@ trees_out::tree_node (tree t)
>       }
>        else if (code == tt_id && streaming_p ())
>       str (IDENTIFIER_POINTER (t), IDENTIFIER_LENGTH (t));
> +      else if (code == tt_internal_id && streaming_p ())
> +     str (prefix_for_internal_label (t));
>  
>        int tag = insert (t);
>        if (streaming_p ())
>       {
> -       /* We know the ordering of the 4 id tags.  */
> +       /* We know the ordering of the 5 id tags.  */
>         static const char *const kinds[] =
> -         {"", "conv_op ", "anon ", "lambda "};
> +         {"", "conv_op ", "anon ", "lambda ", "internal "};
>         dump (dumper::TREE)
>           && dump ("Written:%d %sidentifier:%N", tag,
>                    kinds[code - tt_id],
> @@ -10060,8 +10083,11 @@ trees_out::tree_node (tree t)
>             break;
>  
>           case VAR_DECL:
> -           /* AGGR_INIT_EXPRs cons up anonymous uncontexted VAR_DECLs.  */
> -           gcc_checking_assert (!DECL_NAME (t)
> +           /* AGGR_INIT_EXPRs cons up anonymous uncontexted VAR_DECLs,
> +              and internal vars are created by sanitizers and
> +              __builtin_source_location.  */
> +           gcc_checking_assert ((!DECL_NAME (t)
> +                                 || IDENTIFIER_INTERNAL_P (DECL_NAME (t)))
>                                  && DECL_ARTIFICIAL (t));
>             break;
>  
> @@ -10217,6 +10243,17 @@ trees_in::tree_node (bool is_use)
>        }
>        break;
>  
> +    case tt_internal_id:
> +      /* An internal label.  */
> +      {
> +     const char *prefix = str ();
> +     res = generate_internal_label (prefix);
> +     int tag = insert (res);
> +     dump (dumper::TREE)
> +       && dump ("Read internal identifier:%d %N", tag, res);
> +      }
> +      break;
> +
>      case tt_typedef_type:
>        res = tree_node ();
>        if (res)
> diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1.h 
> b/gcc/testsuite/g++.dg/modules/src-loc-1.h
> new file mode 100644
> index 00000000000..90c2811fe8b
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/src-loc-1.h
> @@ -0,0 +1,6 @@
> +// PR c++/118904
> +
> +#include <source_location>
> +inline std::source_location foo() {
> +  return std::source_location::current();
> +}
> diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_a.H 
> b/gcc/testsuite/g++.dg/modules/src-loc-1_a.H
> new file mode 100644
> index 00000000000..21c92ed4f83
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/src-loc-1_a.H
> @@ -0,0 +1,7 @@
> +// PR c++/118904
> +// { dg-additional-options "-fmodule-header -std=c++20 
> -fdump-lang-module-uid" }
> +// { dg-module-cmi {} }
> +
> +#include "src-loc-1.h"
> +
> +// { dg-final { scan-lang-dump {Written:-[0-9]* internal 
> identifier:[^\n\r]*Lsrc_loc} module } }
> diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_b.C 
> b/gcc/testsuite/g++.dg/modules/src-loc-1_b.C
> new file mode 100644
> index 00000000000..44881a2270c
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/src-loc-1_b.C
> @@ -0,0 +1,5 @@
> +// PR c++/118904
> +// { dg-additional-options "-fmodules -std=c++20 -fno-module-lazy" }
> +
> +#include "src-loc-1.h"
> +import "src-loc-1_a.H";
> diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_c.C 
> b/gcc/testsuite/g++.dg/modules/src-loc-1_c.C
> new file mode 100644
> index 00000000000..6b62fd96524
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/src-loc-1_c.C
> @@ -0,0 +1,16 @@
> +// PR c++/118904
> +// { dg-module-do run }
> +// { dg-additional-options "-fmodules -std=c++20 -fdump-lang-module-uid" }
> +
> +import "src-loc-1_a.H";
> +
> +int main() {
> +  const char* a = foo().function_name();
> +  const char* b = std::source_location::current().function_name();
> +  if (__builtin_strcmp(a, "std::source_location foo()"))
> +    __builtin_abort();
> +  if (__builtin_strcmp(b, "int main()"))
> +    __builtin_abort();
> +}
> +
> +// { dg-final { scan-lang-dump {Read internal identifier:-[0-9]* 
> [^\n\r]*Lsrc_loc} module } }
> diff --git a/gcc/testsuite/g++.dg/modules/ubsan-1_a.C 
> b/gcc/testsuite/g++.dg/modules/ubsan-1_a.C
> new file mode 100644
> index 00000000000..583d20118ba
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/ubsan-1_a.C
> @@ -0,0 +1,10 @@
> +// PR c++/98735
> +// { dg-additional-options "-fmodules -fsanitize=undefined -Wno-return-type" 
> }
> +// { dg-module-cmi X }
> +
> +export module X;
> +
> +export inline int f(int x) {
> +    if (x > 0)
> +      return x * 5;
> +}
> diff --git a/gcc/testsuite/g++.dg/modules/ubsan-1_b.C 
> b/gcc/testsuite/g++.dg/modules/ubsan-1_b.C
> new file mode 100644
> index 00000000000..ae715198f6e
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/modules/ubsan-1_b.C
> @@ -0,0 +1,14 @@
> +// PR c++/98735
> +// { dg-additional-options "-fmodules -fsanitize=undefined -Wno-return-type" 
> }
> +// Note: can't work out how to do a link test here.
> +
> +int g(int x) {
> +  if (x > 0)
> +    return x - 5;
> +}
> +
> +import X;
> +
> +int main() {
> +  f(123);
> +}
> diff --git a/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc 
> b/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc
> new file mode 100644
> index 00000000000..7930896140f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc
> @@ -0,0 +1,12 @@
> +// PR c++/98735
> +
> +int g(int x) {
> +  if (x > 0)
> +    return x - 5;
> +}
> +
> +import X;
> +
> +int main() {
> +  f(123);
> +}
> diff --git a/gcc/testsuite/g++.dg/ubsan/module-1.C 
> b/gcc/testsuite/g++.dg/ubsan/module-1.C
> new file mode 100644
> index 00000000000..566113da514
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ubsan/module-1.C
> @@ -0,0 +1,11 @@
> +// PR c++/98735
> +// { dg-do run { target c++17 } }
> +// { dg-options "-fmodules -fsanitize=undefined -Wno-return-type" }
> +// { dg-additional-sources module-1-aux.cc }
> +
> +export module X;
> +
> +export inline int f(int x) {
> +    if (x > 0)
> +      return x * 5;
> +}
> diff --git a/gcc/tree.cc b/gcc/tree.cc
> index 98575a51f58..3b599d81d43 100644
> --- a/gcc/tree.cc
> +++ b/gcc/tree.cc
> @@ -786,6 +786,57 @@ init_ttree (void)
>  }
>  
>  
> +/* Mapping from prefix to label number.  */
> +
> +struct identifier_hash : ggc_ptr_hash <tree_node>
> +{
> +  static inline hashval_t hash (tree t)
> +  {
> +    return IDENTIFIER_HASH_VALUE (t);
> +  }
> +};
> +struct identifier_count_traits
> +  : simple_hashmap_traits<identifier_hash, long> {};
> +typedef hash_map<tree, long, identifier_count_traits> internal_label_map;
> +static GTY(()) internal_label_map *internal_label_nums;
> +
> +/* Generates an identifier intended to be used internally with the
> +   given PREFIX.  This is intended to be used by the frontend so that
> +   C++ modules can regenerate appropriate (non-clashing) identifiers on
> +   stream-in.  */
> +
> +tree
> +generate_internal_label (const char *prefix)
> +{
> +  tree prefix_id = get_identifier (prefix);
> +  if (!internal_label_nums)
> +    internal_label_nums = internal_label_map::create_ggc();
> +  long &num = internal_label_nums->get_or_insert (prefix_id);
> +
> +  char tmp[32];
> +  ASM_GENERATE_INTERNAL_LABEL (tmp, prefix, num++);
> +
> +  tree id = get_identifier (tmp);
> +  IDENTIFIER_INTERNAL_P (id) = true;
> +
> +  /* Cache the prefix on the identifier so we can retrieve it later.  */
> +  TREE_CHAIN (id) = prefix_id;
> +
> +  return id;
> +}
> +
> +/* Get the PREFIX we created the internal identifier LABEL with.  */
> +
> +const char *
> +prefix_for_internal_label (tree label)
> +{
> +  gcc_assert (IDENTIFIER_INTERNAL_P (label)
> +           && !IDENTIFIER_TRANSPARENT_ALIAS (label)
> +           && TREE_CHAIN (label)
> +           && TREE_CODE (TREE_CHAIN (label)) == IDENTIFIER_NODE);
> +  return IDENTIFIER_POINTER (TREE_CHAIN (label));
> +}
> +
>  /* The name of the object as the assembler will see it (but before any
>     translations made by ASM_OUTPUT_LABELREF).  Often this is the same
>     as DECL_NAME.  It is an IDENTIFIER_NODE.  */
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 0d876234824..89263b61af6 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -1078,6 +1078,11 @@ extern void omp_clause_range_check_failed (const_tree, 
> const char *, int,
>  #define IDENTIFIER_ANON_P(NODE) \
>    (IDENTIFIER_NODE_CHECK (NODE)->base.private_flag)
>  
> +/* Nonzero indicates an IDENTIFIER_NODE that names an internal label.
> +   The prefix used to generate the label can be found on the TREE_CHAIN.  */
> +#define IDENTIFIER_INTERNAL_P(NODE) \
> +  (IDENTIFIER_NODE_CHECK (NODE)->base.volatile_flag)
> +
>  /* Nonzero in an IDENTIFIER_NODE if the name is a local alias, whose
>     uses are to be substituted for uses of the TREE_CHAINed identifier.  */
>  #define IDENTIFIER_TRANSPARENT_ALIAS(NODE) \
> @@ -4730,6 +4735,8 @@ vector_cst_encoded_nelts (const_tree t)
>    return VECTOR_CST_NPATTERNS (t) * VECTOR_CST_NELTS_PER_PATTERN (t);
>  }
>  
> +extern tree generate_internal_label (const char *);
> +extern const char *prefix_for_internal_label (tree label);
>  extern tree decl_assembler_name (tree);
>  extern void overwrite_decl_assembler_name (tree decl, tree name);
>  extern tree decl_comdat_group (const_tree);
> diff --git a/gcc/ubsan.cc b/gcc/ubsan.cc
> index 35a7dbd011e..3c130a66095 100644
> --- a/gcc/ubsan.cc
> +++ b/gcc/ubsan.cc
> @@ -358,10 +358,6 @@ get_ubsan_type_info_for_type (tree type)
>      return 0;
>  }
>  
> -/* Counters for internal labels.  ubsan_ids[0] for Lubsan_type,
> -   ubsan_ids[1] for Lubsan_data labels.  */
> -static GTY(()) unsigned int ubsan_ids[2];
> -
>  /* Helper routine that returns ADDR_EXPR of a VAR_DECL of a type
>     descriptor.  It first looks into the hash table; if not found,
>     create the VAR_DECL, put it into the hash table and return the
> @@ -552,10 +548,8 @@ ubsan_type_descriptor (tree type, enum ubsan_print_style 
> pstyle)
>    TREE_READONLY (str) = 1;
>    TREE_STATIC (str) = 1;
>  
> -  char tmp_name[32];
> -  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_type", ubsan_ids[0]++);
> -  decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name),
> -                  dtype);
> +  decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
> +                  generate_internal_label ("Lubsan_type"), dtype);
>    TREE_STATIC (decl) = 1;
>    TREE_PUBLIC (decl) = 0;
>    DECL_ARTIFICIAL (decl) = 1;
> @@ -659,10 +653,8 @@ ubsan_create_data (const char *name, int loccnt, const 
> location_t *ploc, ...)
>    layout_type (ret);
>  
>    /* Now, fill in the type.  */
> -  char tmp_name[32];
> -  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_data", ubsan_ids[1]++);
> -  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier 
> (tmp_name),
> -                      ret);
> +  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
> +                      generate_internal_label ("Lubsan_data"), ret);
>    TREE_STATIC (var) = 1;
>    TREE_PUBLIC (var) = 0;
>    DECL_ARTIFICIAL (var) = 1;
> -- 
> 2.47.0
> 

Reply via email to