Hi,
as discussed also on the autofdo pull request, LLVM solves the same
problem using -funique-internal-linkage-names
https://reviews.llvm.org/D73307

All non-public functions gets theis symbol renamed from
<function_name>.__uniq.<md5 sum of source file name in decadic>
Decadic is used since demanglers special case numerical suffixes.
In addition debug info of all functions get DW_AT_linkage_name
with this linkage name as attribute.

This makes it possible to uniquely match symbol names in the
gcov profile with symbol table at -fauto-profile readback time.

Implementing the gory details I see that this solution is bit
of a hack.  I would say

Pros
 - it is not always good idea to reinvent wheel (but differently)
 - it is transparent to current implemention of create_gcov
 - by being mostly compatible with LLVM we simplify tooling
   of projects that uses both GCC and LLVM as compilers.
   Moreover we keep possibility of both LLVM and GCC consuming
   same profile data eventually which can be good for projects
   that ship them in release bacpages
Cons
 - it does not work with toplevel assembler (since it renames the
   actual symbol).  This may be solved at gas side by syntactic aliases
   if supported.
 - it makes symbol names longer making all other tools, not only
   auto-fdo to process them in symbol tables and debug info

Conceptually it is working around limitation of create_gcov dwarf
consumer for sure (it is also mentioned in the review at LLVM side
linked above).

Learning about details, I think the orignal patchset to extend gcov
format by file names has few issues, too.  In partiuclar
 - it is important to add file names only to function symbols that are
   not public, so having both public and private symbols of the same
   name as necessary
 - As discussed in autofdo pull request
   https://github.com/google/autofdo/pull/244
   we can just combine file name and symbol name into existing name
   entry.  Somehting like filename/symbol name
 - We can not use filename of source contianing the function but instead
   name of the translation unit.
   It is common for inline headers to contain static functions that
   would get mixed up as well.
But those are fixable.

gcc/ChangeLog:

        * auto-profile.cc (string_table::get_index_by_decl):
        If necessary add unique suffix.
        (function_instance::get_cgraph_node): If necessary strip
        unique suffix.
        (match_with_target): If necessary add unique suffix.
        (autofdo_source_profile::offline_external_functions):
        Set uses_unique_names; if unique names are on be more
        strict on mathing
        * common.opt: (funique-internal-linkage-names): Add
        * dwarf2out.cc: Include ipa-utils.h
        (add_linkage_attr): Add unique suffix if needed.
        (add_linkage_name): Output linkage names of non-public
        functions.
        * ipa-utils.h (decl_unique_linkage_name_suffix): Declare.
        (decl_unique_linkage_name): Declare.
        (unique_linkage_name_suffix_start): Declare.
        * ipa-visibility.cc: Include md5.h.
        (decl_unique_linkage_name_suffix): New function.
        (decl_unique_linkage_name): New function.
        (unique_linkage_name_suffix_start): New function.
        (function_and_variable_visibility): Compute suffixes
        if necessary.

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 1700bf8f2cd..f1ef18b8a65 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "auto-profile.h"
 #include "tree-pretty-print.h"
 #include "gimple-pretty-print.h"
+#include "ipa-utils.h"
 
 /* The following routines implements AutoFDO optimization.
 
@@ -508,6 +509,7 @@ public:
   create ()
   {
     autofdo_source_profile *map = new autofdo_source_profile ();
+    map->uses_unique_names_ = false;
 
     if (map->read ())
       return map;
@@ -551,6 +553,18 @@ public:
   void offline_external_functions ();
 
   void offline_unrealized_inlines ();
+
+  bool
+  uses_unique_names ()
+  {
+    return uses_unique_names_;
+  }
+
+  void
+  set_uses_unique_names ()
+  {
+    uses_unique_names_ = 1;
+  }
 private:
   /* Map from function_instance name index (in string_table) to
      function_instance.  */
@@ -569,6 +583,8 @@ private:
   name_function_instance_map map_;
 
   auto_vec <function_instance *> duplicate_functions_;
+
+  bool uses_unique_names_;
 };
 
 /* Store the strings read from the profile data file.  */
@@ -818,6 +834,18 @@ int
 string_table::get_index_by_decl (tree decl) const
 {
   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  if (afdo_source_profile->uses_unique_names ()
+      && (!flag_unique_internal_linkage_names
+         || !symtab->function_flags_ready)
+      && !TREE_PUBLIC (decl))
+    {
+      /* TODO: Building unique names for each lookup is not necessary.
+        Build table translating decls and indexes just once.  */
+      char *unique_name = decl_unique_linkage_name (decl);
+      int ret = get_index (unique_name);
+      free (unique_name);
+      return ret;
+    }
   int ret = get_index (name);
   if (ret != -1)
     return ret;
@@ -880,13 +908,41 @@ function_instance::~function_instance ()
 cgraph_node *
 function_instance::get_cgraph_node ()
 {
-  for (symtab_node *n = cgraph_node::get_for_asmname
-                           (get_identifier
-                              (afdo_string_table->get_name (name ())));
-       n; n = n->next_sharing_asm_name)
+  const char *sname = afdo_string_table->get_name (name ());
+  char *nonunique_name = NULL;
+  const char *suffix;
+
+  if ((!symtab->function_flags_ready
+       || !flag_unique_internal_linkage_names)
+      && afdo_source_profile->uses_unique_names ()
+      && (suffix = unique_linkage_name_suffix_start (sname)))
+    {
+      /* Is it local function in this unit?  */
+      if (strncmp (suffix, decl_unique_linkage_name_suffix (),
+                  decl_unique_linkage_name_suffix_len))
+       return NULL;
+      nonunique_name = XNEWVEC (char, suffix - sname + 1);
+      memcpy (nonunique_name, sname, suffix - sname);
+      nonunique_name[suffix - sname] = 0;
+      sname = nonunique_name;
+    }
+  symtab_node *n = cgraph_node::get_for_asmname (get_identifier (sname));
+  if (nonunique_name)
+    free (nonunique_name);
+  for (;n; n = n->next_sharing_asm_name)
     if (cgraph_node *cn = dyn_cast <cgraph_node *> (n))
       if (cn->definition && cn->has_gimple_body_p ())
-       return cn;
+       {
+         /* Do not confuse private symbol clasing with public
+            symbols.  */
+         if (afdo_source_profile->uses_unique_names ()
+             && (!symtab->function_flags_ready
+                 || !flag_unique_internal_linkage_names)
+             && !nonunique_name
+             && !TREE_PUBLIC (cn->decl))
+           return NULL;
+         return cn;
+       }
   return NULL;
 }
 
@@ -1129,9 +1185,15 @@ function_instance::offline_if_in_set (name_index_set 
&seen,
 static int
 match_with_target (gimple *stmt, function_instance *inlined_fn, cgraph_node *n)
 {
+  char *name_to_free = NULL;
   const char *symbol_name = IDENTIFIER_POINTER
                               (DECL_ASSEMBLER_NAME (n->decl));
   const char *name = afdo_string_table->get_name (inlined_fn->name ());
+  if (!TREE_PUBLIC (n->decl)
+      && (!flag_unique_internal_linkage_names
+         || !symtab->function_flags_ready)
+      && afdo_source_profile->uses_unique_names ())
+    symbol_name = name_to_free = decl_unique_linkage_name (n->decl);
   if (strcmp (name, symbol_name))
     {
       int i;
@@ -1144,10 +1206,11 @@ match_with_target (gimple *stmt, function_instance 
*inlined_fn, cgraph_node *n)
            in_suffix = true;
        }
       /* Accept dwarf names and stripped suffixes.  */
-      if (!strcmp (lang_hooks.dwarf_name (n->decl, 0),
-                  afdo_string_table->get_name (inlined_fn->name ()))
-         || (!name[i] && symbol_name[i] == '.')
-         || in_suffix)
+      if (afdo_source_profile->uses_unique_names ()
+         && (!strcmp (lang_hooks.dwarf_name (n->decl, 0),
+                      afdo_string_table->get_name (inlined_fn->name ()))
+             || (!name[i] && symbol_name[i] == '.')
+             || in_suffix))
        {
          int index = afdo_string_table->get_index (symbol_name);
          if (index == -1)
@@ -1163,8 +1226,12 @@ match_with_target (gimple *stmt, function_instance 
*inlined_fn, cgraph_node *n)
                  "auto-profile contains inlined "
                  "function with symbol name %s instead of symbol name %s",
                  name, symbol_name);
+      if (name_to_free)
+       free (name_to_free);
       return 0;
     }
+  if (name_to_free)
+    free (name_to_free);
   return 1;
 }
 
@@ -1892,6 +1986,9 @@ autofdo_source_profile::offline_external_functions ()
     {
       const char *n1 = afdo_string_table->get_name (i);
       char *n2 = get_original_name (n1);
+      if (!afdo_source_profile->uses_unique_names ()
+         && unique_linkage_name_suffix_start (n1))
+       afdo_source_profile->set_uses_unique_names ();
       if (!strcmp (n1, n2))
        {
          free (n2);
@@ -1924,13 +2021,28 @@ autofdo_source_profile::offline_external_functions ()
     {
       const char *name
          = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl));
-      const char *dwarf_name = lang_hooks.dwarf_name (node->decl, 0);
-      int index = afdo_string_table->get_index (name);
+      const char *dwarf_name
+             = afdo_source_profile->uses_unique_names ()
+               ? NULL : lang_hooks.dwarf_name (node->decl, 0);
+      char *unique_name = NULL;
+      int index;
+     
+      if (!afdo_source_profile->uses_unique_names ()
+         || TREE_PUBLIC (node->decl)
+         || (flag_unique_internal_linkage_names
+             && symtab->function_flags_ready))
+       index = afdo_string_table->get_index (name);
+      else
+       {
+         name = unique_name = decl_unique_linkage_name (node->decl);
+         index = afdo_string_table->get_index (unique_name);
+       }
 
       /* Inline function may be identified by its dwarf names;
         rename them to symbol names.  With LTO dwarf names are
         lost in free_lange_data.  */
-      if (strcmp (name, dwarf_name))
+      if (!afdo_source_profile->uses_unique_names ()
+         && strcmp (name, dwarf_name))
        {
          int index2 = afdo_string_table->get_index (dwarf_name);
          if (index2 != -1)
@@ -1960,13 +2072,21 @@ autofdo_source_profile::offline_external_functions ()
        {
          if (dump_file)
            {
-             fprintf (dump_file,
-                      "Node %s not in auto profile (%s neither %s)\n",
-                      node->dump_name (),
-                      name,
-                      dwarf_name);
+             if (dwarf_name)
+               fprintf (dump_file,
+                        "Node %s not in auto profile (%s neither %s)\n",
+                        node->dump_name (),
+                        name,
+                        dwarf_name);
+             else
+               fprintf (dump_file,
+                        "Node %s (symbol %s) not in auto profile\n",
+                        node->dump_name (),
+                        name);
            }
        }
+      if (unique_name)
+       free (unique_name);
     }
 
   for (auto iter : to_symbol_name)
diff --git a/gcc/common.opt b/gcc/common.opt
index 3d65656e658..6bda709f708 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3329,6 +3329,10 @@ Common Var(flag_unconstrained_commons) Optimization
 Assume common declarations may be overridden with ones with a larger
 trailing array.
 
+funique-internal-linkage-names
+Common Var(flag_unique_internal_linkage_names) 
+Output unique names to debug info
+
 funit-at-a-time
 Common Var(flag_unit_at_a_time) Init(1)
 Compile whole compilation unit at a time.
diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index d1a55dbcbcb..004829be666 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -97,6 +97,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-iter.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "ipa-utils.h"
 #include "file-prefix-map.h" /* remap_debug_filename()  */
 
 static void dwarf2out_source_line (unsigned int, unsigned int, const char *,
@@ -22165,6 +22166,16 @@ static void
 add_linkage_attr (dw_die_ref die, tree decl)
 {
   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  char *name_to_free = NULL;
+
+  /* DWARF is produced early attribute may be added before
+     ipa_visibility pass which adds linkage suffixes.  */
+  if (flag_unique_internal_linkage_names
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && !DECL_PRESERVE_P (decl)
+      && !TREE_PUBLIC (decl)
+      && !unique_linkage_name_suffix_start (name))
+    name = name_to_free = decl_unique_linkage_name (decl);
 
   /* Mimic what assemble_name_raw does with a leading '*'.  */
   if (name[0] == '*')
@@ -22174,6 +22185,8 @@ add_linkage_attr (dw_die_ref die, tree decl)
     add_AT_string (die, DW_AT_linkage_name, name);
   else
     add_AT_string (die, DW_AT_MIPS_linkage_name, name);
+  if (name_to_free)
+    free (name_to_free);
 }
 
 /* Add source coordinate attributes for the given decl.  */
@@ -22219,7 +22232,12 @@ add_linkage_name (dw_die_ref die, tree decl)
 {
   if (debug_info_level > DINFO_LEVEL_NONE
       && VAR_OR_FUNCTION_DECL_P (decl)
-      && TREE_PUBLIC (decl)
+      && (TREE_PUBLIC (decl)
+         /* Add linkage name for auto-profile.  create_gcov then
+            will use these names that we can uniquely identify with
+            symbols.  */
+         || (flag_unique_internal_linkage_names
+             && TREE_CODE (decl) == FUNCTION_DECL))
       && !(VAR_P (decl) && DECL_REGISTER (decl))
       && die->die_tag != DW_TAG_member)
     add_linkage_name_raw (die, decl);
diff --git a/gcc/ipa-utils.h b/gcc/ipa-utils.h
index f33d21e64ce..0c0e98efbfc 100644
--- a/gcc/ipa-utils.h
+++ b/gcc/ipa-utils.h
@@ -291,4 +291,12 @@ lto_streaming_expected_p ()
   return (flag_wpa || flag_incremental_link == INCREMENTAL_LINK_LTO);
 }
 
+/* Length of unique linkage suffixes.
+   ".__uniq." is 8 characters and md5 hash as decimal number is 39.  */
+
+constexpr size_t decl_unique_linkage_name_suffix_len = 8 + 39;
+extern const char *decl_unique_linkage_name_suffix ();
+extern char *decl_unique_linkage_name (tree decl);
+extern const char *unique_linkage_name_suffix_start (const char *);
+
 #endif  /* GCC_IPA_UTILS_H  */
diff --git a/gcc/ipa-visibility.cc b/gcc/ipa-visibility.cc
index 8097a03e240..58389afa837 100644
--- a/gcc/ipa-visibility.cc
+++ b/gcc/ipa-visibility.cc
@@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "config.h"
 #include "system.h"
+#include "md5.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "function.h"
@@ -336,6 +337,80 @@ varpool_node::externally_visible_p (void)
   return false;
 }
 
+/* Compute unique linkage suffix of DECL to STR.  */
+
+const char *
+decl_unique_linkage_name_suffix ()
+{
+  static char str[decl_unique_linkage_name_suffix_len + 1];
+  if (str[0])
+    return str;
+  char *to_hash = xstrdup (aux_base_name);
+  if (endswith (to_hash, ".gk"))
+    to_hash[strlen (to_hash) - 3] = '\0';
+  unsigned char checksum[16];
+  struct md5_ctx ctx;
+  md5_init_ctx (&ctx);
+  md5_process_bytes (to_hash, strlen (to_hash), &ctx);
+  md5_finish_ctx (&ctx, checksum);
+  FIXED_WIDE_INT (128) tmp = 0;
+  for (int i = 15; i >= 0; i--)
+    tmp = (tmp << 8) + checksum[i];
+  memcpy (str, ".__uniq.", 8);
+  /* Use decimal encoding of md5 sum to so demanglers treat it is
+     a clone index.  */
+  for (int i = 38; i >= 0; i--)
+    {
+      FIXED_WIDE_INT (128) digit, ten = HOST_WIDE_INT_UC (10);
+      tmp = wi::divmod_trunc (tmp, ten, UNSIGNED, &digit);
+      str[i + 8] = '0' + (digit.to_uhwi ());
+    }
+  str[39 + 8] = 0;
+  free (to_hash);
+  return str;
+}
+
+/* Compute unique name of decl in the form of
+   assembler_name.uniq_<md5sum>
+   Caller is responsible for releasing the string.  */
+
+char *
+decl_unique_linkage_name (tree decl)
+{
+  gcc_checking_assert (!TREE_PUBLIC (decl));
+  const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  int len = IDENTIFIER_LENGTH (DECL_ASSEMBLER_NAME (decl));
+  char *unique_name = XNEWVEC (char, len + decl_unique_linkage_name_suffix_len 
+ 1);
+  memcpy (unique_name, name, len);
+  memcpy (unique_name + len, decl_unique_linkage_name_suffix (),
+         decl_unique_linkage_name_suffix_len + 1);
+  return unique_name;
+}
+
+/* If NAME has unique linkage suffix return its start
+   otherwise return NULL.  */
+
+const char *
+unique_linkage_name_suffix_start (const char *name)
+{
+  const char *c = strstr (name, ".__uniq.");
+  if (!c)
+    return 0;
+  for (int i = 0; i < 39; i++)
+    if (c[i+8] < '0' || c[i+8] > '9')
+      {
+       fprintf (stderr, "bad %s\n", name);
+       return NULL;
+      }
+  if (c[decl_unique_linkage_name_suffix_len]
+      && c[decl_unique_linkage_name_suffix_len] != '.')
+    {
+      fprintf (stderr, "bad2 %s %c\n", name, 
c[decl_unique_linkage_name_suffix_len]);
+      return NULL;
+    }
+  return c;
+}
+
 /* Return true if reference to NODE can be replaced by a local alias.
    Local aliases save dynamic linking overhead and enable more optimizations.
  */
@@ -752,6 +827,19 @@ function_and_variable_visibility (bool whole_program)
          if (DECL_EXTERNAL (decl_node->decl))
            DECL_EXTERNAL (node->decl) = 1;
        }
+      if (flag_unique_internal_linkage_names
+         && !flag_wpa
+         && !in_lto_p
+         && TREE_CODE (node->decl) == FUNCTION_DECL
+         && !DECL_PRESERVE_P (node->decl)
+         && !TREE_PUBLIC (node->decl)
+         && !unique_linkage_name_suffix_start
+               (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl))))
+       {
+         char *unique_name = decl_unique_linkage_name (node->decl);
+         symtab->change_decl_assembler_name (node->decl, get_identifier 
(unique_name));
+         free (unique_name);
+       }
 
       update_visibility_by_resolution_info (node);
       if (node->weakref)

Reply via email to