I'm not sure if there might be a better way to retrieve the prefix back
off an IDENTIFIER_NODE?  I'm also not sure if IDENTIFIER_INTERNAL_P
could ever clash with IDENTIFIER_TRANSPARENT_ALIAS; I'm pretty sure not
(and it looks like nothing ever sets that flag anyway, that I could
find?) but would appreciate any other thoughts.

I also wanted a run test (or at least a link test) using
-fsanitize=undefined, but I couldn't work out how to do so under
modules.exp, so I instead placed one under ubsan.exp.

-- >8 --

The frontend creates some variables that need to be given unique names
for the TU so that they can unambiguously be accessed.  Historically
this has been done with a global counter local to each place that needs
an internal label, but this doesn't work with modules as depending on
what declarations have been imported, some counter values may have
already been used.

This patch reworks the situation to instead have a single collection of
counters for the TU, and a new function 'generate_internal_label' that
gets the next label with given prefix using that counter.  Modules
streaming can then use this function to regenerate new names on
stream-in for any such decls, guaranteeing uniqueness within the TU.

These labels should only be used for internal entities so there should
be no issues with the names differing from TU to TU; we will need to
handle this if we ever start checking ODR of definitions we're merging
but that's an issue for later.

For proof of concept, this patch makes use of the new API for
__builtin_source_location and ubsan; there are probably other places
in the frontend where this change will need to be made as well.
One other change this exposes is that both of these components rely
on the definition of the VAR_DECLs they create, so stream that too
for uncontexted variables.

        PR c++/98735
        PR c++/118904

gcc/cp/ChangeLog:

        * cp-gimplify.cc (source_location_id): Remove.
        (fold_builtin_source_location): Use generate_internal_label.
        * module.cc (enum tree_tag): Add 'tt_internal_id' enumerator.
        (trees_out::tree_value): Adjust assertion, write definitions
        of uncontexted VAR_DECLs.
        (trees_in::tree_value): Read variable definitions.
        (trees_out::tree_node): Write internal labels, adjust assert.
        (trees_in::tree_node): Read internal labels.

gcc/ChangeLog:

        * tree.cc (struct identifier_hash): New type.
        (struct identifier_count_traits): New traits.
        (internal_label_nums): New hash map.
        (generate_internal_label): New function.
        (prefix_for_internal_label): New function.
        * tree.h (IDENTIFIER_INTERNAL_P): New macro.
        (generate_internal_label): Declare.
        (prefix_for_internal_label): Declare.
        * ubsan.cc (ubsan_ids): Remove.
        (ubsan_type_descriptor): Use generate_internal_label.
        (ubsan_create_data): Likewise.

gcc/testsuite/ChangeLog:

        * g++.dg/modules/src-loc-1.h: New test.
        * g++.dg/modules/src-loc-1_a.H: New test.
        * g++.dg/modules/src-loc-1_b.C: New test.
        * g++.dg/modules/src-loc-1_c.C: New test.
        * g++.dg/modules/ubsan-1_a.C: New test.
        * g++.dg/modules/ubsan-1_b.C: New test.
        * g++.dg/ubsan/module-1-aux.cc: New test.
        * g++.dg/ubsan/module-1.C: New test.

Signed-off-by: Nathaniel Shead <nathanielosh...@gmail.com>
---
 gcc/cp/cp-gimplify.cc                      |  5 +--
 gcc/cp/module.cc                           | 51 +++++++++++++++++++---
 gcc/testsuite/g++.dg/modules/src-loc-1.h   |  6 +++
 gcc/testsuite/g++.dg/modules/src-loc-1_a.H |  7 +++
 gcc/testsuite/g++.dg/modules/src-loc-1_b.C |  5 +++
 gcc/testsuite/g++.dg/modules/src-loc-1_c.C | 16 +++++++
 gcc/testsuite/g++.dg/modules/ubsan-1_a.C   | 10 +++++
 gcc/testsuite/g++.dg/modules/ubsan-1_b.C   | 14 ++++++
 gcc/testsuite/g++.dg/ubsan/module-1-aux.cc | 12 +++++
 gcc/testsuite/g++.dg/ubsan/module-1.C      | 11 +++++
 gcc/tree.cc                                | 51 ++++++++++++++++++++++
 gcc/tree.h                                 |  7 +++
 gcc/ubsan.cc                               | 16 ++-----
 13 files changed, 188 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1.h
 create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/src-loc-1_c.C
 create mode 100644 gcc/testsuite/g++.dg/modules/ubsan-1_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/ubsan-1_b.C
 create mode 100644 gcc/testsuite/g++.dg/ubsan/module-1-aux.cc
 create mode 100644 gcc/testsuite/g++.dg/ubsan/module-1.C

diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index f7bd453bc5e..7414064c68a 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -3874,7 +3874,6 @@ struct source_location_table_entry_hash
 
 static GTY(()) hash_table <source_location_table_entry_hash>
   *source_location_table;
-static GTY(()) unsigned int source_location_id;
 
 /* Fold the __builtin_source_location () call T.  */
 
@@ -3907,9 +3906,7 @@ fold_builtin_source_location (const_tree t)
     var = entryp->var;
   else
     {
-      char tmp_name[32];
-      ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lsrc_loc", source_location_id++);
-      var = build_decl (loc, VAR_DECL, get_identifier (tmp_name),
+      var = build_decl (loc, VAR_DECL, generate_internal_label ("Lsrc_loc"),
                        source_location_impl);
       TREE_STATIC (var) = 1;
       TREE_PUBLIC (var) = 0;
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 765d17935c5..160b2448311 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -2811,12 +2811,13 @@ enum tree_tag {
   tt_decl,             /* By-value mergeable decl.  */
   tt_tpl_parm,         /* Template parm.  */
 
-  /* The ordering of the following 4 is relied upon in
+  /* The ordering of the following 5 is relied upon in
      trees_out::tree_node.  */
   tt_id,               /* Identifier node.  */
   tt_conv_id,          /* Conversion operator name.  */
   tt_anon_id,          /* Anonymous name.  */
   tt_lambda_id,                /* Lambda name.  */
+  tt_internal_id,       /* Internal name.  */
 
   tt_typedef_type,     /* A (possibly implicit) typedefed type.  */
   tt_derived_type,     /* A type derived from another type.  */
@@ -9662,7 +9663,9 @@ trees_out::tree_value (tree t)
                          && (TREE_CODE (t) != TYPE_DECL
                              || (DECL_ARTIFICIAL (t) && !DECL_CONTEXT (t)))
                          && (TREE_CODE (t) != VAR_DECL
-                             || (!DECL_NAME (t) && !DECL_CONTEXT (t)))
+                             || ((!DECL_NAME (t)
+                                  || IDENTIFIER_INTERNAL_P (DECL_NAME (t)))
+                                 && !DECL_CONTEXT (t)))
                          && TREE_CODE (t) != FUNCTION_DECL));
 
   if (streaming_p ())
@@ -9728,6 +9731,14 @@ trees_out::tree_value (tree t)
          && dump ("Written type:%d %C:%N", type_tag, TREE_CODE (type), type);
     }
 
+  /* For uncontexted VAR_DECLs we need to stream the definition so that
+     importers can recreate their value.  */
+  if (TREE_CODE (t) == VAR_DECL)
+    {
+      gcc_checking_assert (!DECL_NONTRIVIALLY_INITIALIZED_P (t));
+      tree_node (DECL_INITIAL (t));
+    }
+
   if (streaming_p ())
     dump (dumper::TREE) && dump ("Written tree:%d %C:%N", tag, TREE_CODE (t), 
t);
 }
@@ -9817,6 +9828,13 @@ bail:
        && dump ("Read type:%d %C:%N", type_tag, TREE_CODE (type), type);
     }
 
+  if (TREE_CODE (t) == VAR_DECL)
+    {
+      DECL_INITIAL (t) = tree_node ();
+      if (TREE_STATIC (t))
+       varpool_node::finalize_decl (t);
+    }
+
   if (TREE_CODE (t) == LAMBDA_EXPR
       && CLASSTYPE_LAMBDA_EXPR (TREE_TYPE (t)))
     {
@@ -9959,10 +9977,13 @@ trees_out::tree_node (tree t)
 
   if (TREE_CODE (t) == IDENTIFIER_NODE)
     {
-      /* An identifier node -> tt_id, tt_conv_id, tt_anon_id, tt_lambda_id.  */
+      /* An identifier node -> tt_id, tt_conv_id, tt_anon_id, tt_lambda_id,
+        tt_internal_id.  */
       int code = tt_id;
       if (IDENTIFIER_ANON_P (t))
        code = IDENTIFIER_LAMBDA_P (t) ? tt_lambda_id : tt_anon_id;
+      else if (IDENTIFIER_INTERNAL_P (t))
+       code = tt_internal_id;
       else if (IDENTIFIER_CONV_OP_P (t))
        code = tt_conv_id;
 
@@ -9977,13 +9998,15 @@ trees_out::tree_node (tree t)
        }
       else if (code == tt_id && streaming_p ())
        str (IDENTIFIER_POINTER (t), IDENTIFIER_LENGTH (t));
+      else if (code == tt_internal_id && streaming_p ())
+       str (prefix_for_internal_label (t));
 
       int tag = insert (t);
       if (streaming_p ())
        {
-         /* We know the ordering of the 4 id tags.  */
+         /* We know the ordering of the 5 id tags.  */
          static const char *const kinds[] =
-           {"", "conv_op ", "anon ", "lambda "};
+           {"", "conv_op ", "anon ", "lambda ", "internal "};
          dump (dumper::TREE)
            && dump ("Written:%d %sidentifier:%N", tag,
                     kinds[code - tt_id],
@@ -10060,8 +10083,11 @@ trees_out::tree_node (tree t)
              break;
 
            case VAR_DECL:
-             /* AGGR_INIT_EXPRs cons up anonymous uncontexted VAR_DECLs.  */
-             gcc_checking_assert (!DECL_NAME (t)
+             /* AGGR_INIT_EXPRs cons up anonymous uncontexted VAR_DECLs,
+                and internal vars are created by sanitizers and
+                __builtin_source_location.  */
+             gcc_checking_assert ((!DECL_NAME (t)
+                                   || IDENTIFIER_INTERNAL_P (DECL_NAME (t)))
                                   && DECL_ARTIFICIAL (t));
              break;
 
@@ -10217,6 +10243,17 @@ trees_in::tree_node (bool is_use)
       }
       break;
 
+    case tt_internal_id:
+      /* An internal label.  */
+      {
+       const char *prefix = str ();
+       res = generate_internal_label (prefix);
+       int tag = insert (res);
+       dump (dumper::TREE)
+         && dump ("Read internal identifier:%d %N", tag, res);
+      }
+      break;
+
     case tt_typedef_type:
       res = tree_node ();
       if (res)
diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1.h 
b/gcc/testsuite/g++.dg/modules/src-loc-1.h
new file mode 100644
index 00000000000..90c2811fe8b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/src-loc-1.h
@@ -0,0 +1,6 @@
+// PR c++/118904
+
+#include <source_location>
+inline std::source_location foo() {
+  return std::source_location::current();
+}
diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_a.H 
b/gcc/testsuite/g++.dg/modules/src-loc-1_a.H
new file mode 100644
index 00000000000..21c92ed4f83
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/src-loc-1_a.H
@@ -0,0 +1,7 @@
+// PR c++/118904
+// { dg-additional-options "-fmodule-header -std=c++20 -fdump-lang-module-uid" 
}
+// { dg-module-cmi {} }
+
+#include "src-loc-1.h"
+
+// { dg-final { scan-lang-dump {Written:-[0-9]* internal 
identifier:[^\n\r]*Lsrc_loc} module } }
diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_b.C 
b/gcc/testsuite/g++.dg/modules/src-loc-1_b.C
new file mode 100644
index 00000000000..44881a2270c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/src-loc-1_b.C
@@ -0,0 +1,5 @@
+// PR c++/118904
+// { dg-additional-options "-fmodules -std=c++20 -fno-module-lazy" }
+
+#include "src-loc-1.h"
+import "src-loc-1_a.H";
diff --git a/gcc/testsuite/g++.dg/modules/src-loc-1_c.C 
b/gcc/testsuite/g++.dg/modules/src-loc-1_c.C
new file mode 100644
index 00000000000..6b62fd96524
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/src-loc-1_c.C
@@ -0,0 +1,16 @@
+// PR c++/118904
+// { dg-module-do run }
+// { dg-additional-options "-fmodules -std=c++20 -fdump-lang-module-uid" }
+
+import "src-loc-1_a.H";
+
+int main() {
+  const char* a = foo().function_name();
+  const char* b = std::source_location::current().function_name();
+  if (__builtin_strcmp(a, "std::source_location foo()"))
+    __builtin_abort();
+  if (__builtin_strcmp(b, "int main()"))
+    __builtin_abort();
+}
+
+// { dg-final { scan-lang-dump {Read internal identifier:-[0-9]* 
[^\n\r]*Lsrc_loc} module } }
diff --git a/gcc/testsuite/g++.dg/modules/ubsan-1_a.C 
b/gcc/testsuite/g++.dg/modules/ubsan-1_a.C
new file mode 100644
index 00000000000..583d20118ba
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/ubsan-1_a.C
@@ -0,0 +1,10 @@
+// PR c++/98735
+// { dg-additional-options "-fmodules -fsanitize=undefined -Wno-return-type" }
+// { dg-module-cmi X }
+
+export module X;
+
+export inline int f(int x) {
+    if (x > 0)
+      return x * 5;
+}
diff --git a/gcc/testsuite/g++.dg/modules/ubsan-1_b.C 
b/gcc/testsuite/g++.dg/modules/ubsan-1_b.C
new file mode 100644
index 00000000000..ae715198f6e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/ubsan-1_b.C
@@ -0,0 +1,14 @@
+// PR c++/98735
+// { dg-additional-options "-fmodules -fsanitize=undefined -Wno-return-type" }
+// Note: can't work out how to do a link test here.
+
+int g(int x) {
+  if (x > 0)
+    return x - 5;
+}
+
+import X;
+
+int main() {
+  f(123);
+}
diff --git a/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc 
b/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc
new file mode 100644
index 00000000000..7930896140f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ubsan/module-1-aux.cc
@@ -0,0 +1,12 @@
+// PR c++/98735
+
+int g(int x) {
+  if (x > 0)
+    return x - 5;
+}
+
+import X;
+
+int main() {
+  f(123);
+}
diff --git a/gcc/testsuite/g++.dg/ubsan/module-1.C 
b/gcc/testsuite/g++.dg/ubsan/module-1.C
new file mode 100644
index 00000000000..566113da514
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ubsan/module-1.C
@@ -0,0 +1,11 @@
+// PR c++/98735
+// { dg-do run { target c++17 } }
+// { dg-options "-fmodules -fsanitize=undefined -Wno-return-type" }
+// { dg-additional-sources module-1-aux.cc }
+
+export module X;
+
+export inline int f(int x) {
+    if (x > 0)
+      return x * 5;
+}
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 98575a51f58..3b599d81d43 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -786,6 +786,57 @@ init_ttree (void)
 }
 
 
+/* Mapping from prefix to label number.  */
+
+struct identifier_hash : ggc_ptr_hash <tree_node>
+{
+  static inline hashval_t hash (tree t)
+  {
+    return IDENTIFIER_HASH_VALUE (t);
+  }
+};
+struct identifier_count_traits
+  : simple_hashmap_traits<identifier_hash, long> {};
+typedef hash_map<tree, long, identifier_count_traits> internal_label_map;
+static GTY(()) internal_label_map *internal_label_nums;
+
+/* Generates an identifier intended to be used internally with the
+   given PREFIX.  This is intended to be used by the frontend so that
+   C++ modules can regenerate appropriate (non-clashing) identifiers on
+   stream-in.  */
+
+tree
+generate_internal_label (const char *prefix)
+{
+  tree prefix_id = get_identifier (prefix);
+  if (!internal_label_nums)
+    internal_label_nums = internal_label_map::create_ggc();
+  long &num = internal_label_nums->get_or_insert (prefix_id);
+
+  char tmp[32];
+  ASM_GENERATE_INTERNAL_LABEL (tmp, prefix, num++);
+
+  tree id = get_identifier (tmp);
+  IDENTIFIER_INTERNAL_P (id) = true;
+
+  /* Cache the prefix on the identifier so we can retrieve it later.  */
+  TREE_CHAIN (id) = prefix_id;
+
+  return id;
+}
+
+/* Get the PREFIX we created the internal identifier LABEL with.  */
+
+const char *
+prefix_for_internal_label (tree label)
+{
+  gcc_assert (IDENTIFIER_INTERNAL_P (label)
+             && !IDENTIFIER_TRANSPARENT_ALIAS (label)
+             && TREE_CHAIN (label)
+             && TREE_CODE (TREE_CHAIN (label)) == IDENTIFIER_NODE);
+  return IDENTIFIER_POINTER (TREE_CHAIN (label));
+}
+
 /* The name of the object as the assembler will see it (but before any
    translations made by ASM_OUTPUT_LABELREF).  Often this is the same
    as DECL_NAME.  It is an IDENTIFIER_NODE.  */
diff --git a/gcc/tree.h b/gcc/tree.h
index 0d876234824..89263b61af6 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1078,6 +1078,11 @@ extern void omp_clause_range_check_failed (const_tree, 
const char *, int,
 #define IDENTIFIER_ANON_P(NODE) \
   (IDENTIFIER_NODE_CHECK (NODE)->base.private_flag)
 
+/* Nonzero indicates an IDENTIFIER_NODE that names an internal label.
+   The prefix used to generate the label can be found on the TREE_CHAIN.  */
+#define IDENTIFIER_INTERNAL_P(NODE) \
+  (IDENTIFIER_NODE_CHECK (NODE)->base.volatile_flag)
+
 /* Nonzero in an IDENTIFIER_NODE if the name is a local alias, whose
    uses are to be substituted for uses of the TREE_CHAINed identifier.  */
 #define IDENTIFIER_TRANSPARENT_ALIAS(NODE) \
@@ -4730,6 +4735,8 @@ vector_cst_encoded_nelts (const_tree t)
   return VECTOR_CST_NPATTERNS (t) * VECTOR_CST_NELTS_PER_PATTERN (t);
 }
 
+extern tree generate_internal_label (const char *);
+extern const char *prefix_for_internal_label (tree label);
 extern tree decl_assembler_name (tree);
 extern void overwrite_decl_assembler_name (tree decl, tree name);
 extern tree decl_comdat_group (const_tree);
diff --git a/gcc/ubsan.cc b/gcc/ubsan.cc
index 35a7dbd011e..3c130a66095 100644
--- a/gcc/ubsan.cc
+++ b/gcc/ubsan.cc
@@ -358,10 +358,6 @@ get_ubsan_type_info_for_type (tree type)
     return 0;
 }
 
-/* Counters for internal labels.  ubsan_ids[0] for Lubsan_type,
-   ubsan_ids[1] for Lubsan_data labels.  */
-static GTY(()) unsigned int ubsan_ids[2];
-
 /* Helper routine that returns ADDR_EXPR of a VAR_DECL of a type
    descriptor.  It first looks into the hash table; if not found,
    create the VAR_DECL, put it into the hash table and return the
@@ -552,10 +548,8 @@ ubsan_type_descriptor (tree type, enum ubsan_print_style 
pstyle)
   TREE_READONLY (str) = 1;
   TREE_STATIC (str) = 1;
 
-  char tmp_name[32];
-  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_type", ubsan_ids[0]++);
-  decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name),
-                    dtype);
+  decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+                    generate_internal_label ("Lubsan_type"), dtype);
   TREE_STATIC (decl) = 1;
   TREE_PUBLIC (decl) = 0;
   DECL_ARTIFICIAL (decl) = 1;
@@ -659,10 +653,8 @@ ubsan_create_data (const char *name, int loccnt, const 
location_t *ploc, ...)
   layout_type (ret);
 
   /* Now, fill in the type.  */
-  char tmp_name[32];
-  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "Lubsan_data", ubsan_ids[1]++);
-  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name),
-                        ret);
+  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+                        generate_internal_label ("Lubsan_data"), ret);
   TREE_STATIC (var) = 1;
   TREE_PUBLIC (var) = 0;
   DECL_ARTIFICIAL (var) = 1;
-- 
2.47.0

Reply via email to