WMichal Jires <[email protected]> writes:

>> 
>> I would add a dump file which shows what asm statements made symbol to
>> go global for easier debugging later (I suppose eventually this may get
>> us surprises)
>> > +
>> > +        l = 0;
>> > +      }
>> > +
>> > +    /* Prevent overflow.  */
>> > +    if (l >= ident_buffer_size)
>> > +      {
>> > +        ident_buffer_size *= 2;
>> > +        ident = (char*) xrealloc (ident, ident_buffer_size);
>> 
>> Perhas auto_vec<char> and safe_push would be more readable (and less
>> effecient) way to do this in this millenia?
>> Honza
>
> Changes in v3:
> Applied the suggested changes.
>

This testcase ICEs with v3 of the series (not checked exactly which
patch, I assume this one):

__asm("");
char main_argv;

$ gcc -flto x.ii
lto1: error: type variant has different ‘TREE_TYPE’
 <array_type 0x7ff92de9e0a8
    type <integer_type 0x7ff92de93f18 char public QI
        size <integer_cst 0x7ff92e002eb8 constant 8>
        unit-size <integer_cst 0x7ff92e002ed0 constant 1>
        align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e0173f0 precision:8 min <integer_cst 0x7ff92de8b780 -128> max 
<integer_cst 0x7ff92de8b798 127>>
    QI size <integer_cst 0x7ff92e002eb8 8> unit-size <integer_cst 
0x7ff92e002ed0 1>
    align:8 warn_if_not_align:0 symtab:0 alias-set -1 structural-equality
    domain <integer_type 0x7ff92de9e000
        type <integer_type 0x7ff92e017000 sizetype public unsigned DI
            size <integer_cst 0x7ff92e002dc8 constant 64>
            unit-size <integer_cst 0x7ff92e002de0 constant 8>
            align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e017738 precision:64 min <integer_cst 0x7ff92e002df8 0> max <integer_cst 
0x7ff92e0034a0 18446744073709551615>>
        DI size <integer_cst 0x7ff92e002dc8 64> unit-size <integer_cst 
0x7ff92e002de0 8>
        align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e017738 precision:64 min <integer_cst 0x7ff92e002df8 0> max <integer_cst 
0x7ff92e002df8 0>>>
lto1: error: type variant’s ‘TREE_TYPE’
 <integer_type 0x7ff92de93f18 char public QI
    size <integer_cst 0x7ff92e002eb8 type <integer_type 0x7ff92e0170a8 
bitsizetype> constant 8>
    unit-size <integer_cst 0x7ff92e002ed0 type <integer_type 0x7ff92e017000 
sizetype> constant 1>
    align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e0173f0 precision:8 min <integer_cst 0x7ff92de8b780 -128> max 
<integer_cst 0x7ff92de8b798 127>>
lto1: error: type’s ‘TREE_TYPE’
 <integer_type 0x7ff92de9e150 char readonly QI
    size <integer_cst 0x7ff92e002eb8 type <integer_type 0x7ff92e0170a8 
bitsizetype> constant 8>
    unit-size <integer_cst 0x7ff92e002ed0 type <integer_type 0x7ff92e017000 
sizetype> constant 1>
    align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e0173f0 precision:8 min <integer_cst 0x7ff92de8b780 -128> max 
<integer_cst 0x7ff92de8b798 127>>
 <array_type 0x7ff92de9e1f8
    type <integer_type 0x7ff92de9e150 char readonly QI
        size <integer_cst 0x7ff92e002eb8 constant 8>
        unit-size <integer_cst 0x7ff92e002ed0 constant 1>
        align:8 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e0173f0 precision:8 min <integer_cst 0x7ff92de8b780 -128> max 
<integer_cst 0x7ff92de8b798 127>>
    QI size <integer_cst 0x7ff92e002eb8 8> unit-size <integer_cst 
0x7ff92e002ed0 1>
    align:8 warn_if_not_align:0 symtab:0 alias-set -1 structural-equality
    domain <integer_type 0x7ff92de9e000
        type <integer_type 0x7ff92e017000 sizetype public unsigned DI
            size <integer_cst 0x7ff92e002dc8 constant 64>
            unit-size <integer_cst 0x7ff92e002de0 constant 8>
            align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e017738 precision:64 min <integer_cst 0x7ff92e002df8 0> max <integer_cst 
0x7ff92e0034a0 18446744073709551615>>
        DI size <integer_cst 0x7ff92e002dc8 64> unit-size <integer_cst 
0x7ff92e002de0 8>
        align:64 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 
0x7ff92e017738 precision:64 min <integer_cst 0x7ff92e002df8 0> max <integer_cst 
0x7ff92e002df8 0>>>

> ---
>
> This new pass heuristically detects symbols referenced by toplevel
> assembly to prevent their optimization.
>
> Heuristics is done by comparing identifiers in assembly to known
> symbols.
>
> The pass is split into 2 passes, in LGEN and in WPA.
> There must be one pass for WPA to be able to reference any symbol.
> However in WPA there may be multiple symbols with the same name,
> so we handle those local symbols in LGEN.
>
> gcc/ChangeLog:
>
>       * asm-toplevel.cc (mark_fragile_ref_by_asm):
>       Add marked_local to handle symbol as local.
>       (ipa_asm_heuristics): New.
>       (class pass_ipa_asm): New.
>       (make_pass_ipa_asm_lgen): New.
>       (make_pass_ipa_asm_wpa): New.
>       * common.opt: New flto-toplevel-asm-heuristics.
>       * passes.def: New asm passes.
>       * timevar.def (TV_IPA_LTO_ASM): New.
>       * tree-pass.h (make_pass_ipa_asm_lgen): New.
>       (make_pass_ipa_asm_wpa): New.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.dg/lto/toplevel-simple-asm-1_0.c: New test.
>       * gcc.dg/lto/toplevel-simple-asm-1_1.c: New test.
>       * gcc.dg/lto/toplevel-simple-asm-2_0.c: New test.
>       * gcc.dg/lto/toplevel-simple-asm-2_1.c: New test.
> ---
>  gcc/asm-toplevel.cc                           | 145 +++++++++++++++++-
>  gcc/common.opt                                |   4 +
>  gcc/passes.def                                |   2 +
>  .../gcc.dg/lto/toplevel-simple-asm-1_0.c      |  19 +++
>  .../gcc.dg/lto/toplevel-simple-asm-1_1.c      |  12 ++
>  .../gcc.dg/lto/toplevel-simple-asm-2_0.c      |  10 ++
>  .../gcc.dg/lto/toplevel-simple-asm-2_1.c      |  12 ++
>  gcc/timevar.def                               |   1 +
>  gcc/tree-pass.h                               |   2 +
>  9 files changed, 205 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
>
> diff --git a/gcc/asm-toplevel.cc b/gcc/asm-toplevel.cc
> index 576e4b3ba73..5ab5b668d50 100644
> --- a/gcc/asm-toplevel.cc
> +++ b/gcc/asm-toplevel.cc
> @@ -29,11 +29,11 @@ along with GCC; see the file COPYING3.  If not see
>  /* This symbol must be available and cannot be renamed.
>     Marks the symbol and symbols that reference it.  */
>  static void
> -mark_fragile_ref_by_asm (symtab_node* node)
> +mark_fragile_ref_by_asm (symtab_node* node, bool maybe_local = false)
>  {
>    node->ref_by_asm = true;
>    /* Local symbols must remain in the same partition with their callers.  */
> -  if (!TREE_PUBLIC (node->decl))
> +  if (!TREE_PUBLIC (node->decl) || maybe_local)
>      {
>        unsigned j;
>        ipa_ref *ref;
> @@ -116,3 +116,144 @@ analyze_toplevel_extended_asm ()
>       walk_tree (&l, walk_through_constraints, (void*) &data, NULL);
>      }
>  }
> +
> +
> +/* Checks all toplevel assembly contents and compares them with known 
> symbols.
> +   Marks those symbols with relevant flags.
> +   Heuristics: Detects anything in assembly that looks like an identifer.
> +
> +   This pass must be in WPA, otherwise we will not see all possibly 
> referenced
> +   symbols - if a symbol is only declared, it will not be in the callgraph if
> +   it is only referenced from toplevel assembly.
> +   However in WPA there may be multiple symbols with the same identifier.
> +   The chosen solution is to handle local symbols in LGEN pass first.  */
> +
> +void
> +ipa_asm_heuristics ()
> +{
> +  hash_map<nofree_string_hash, symtab_node *> map;
> +  asm_node *anode = symtab->first_asm_symbol ();
> +  if (!anode)
> +    return;
> +
> +  symtab_node* snode;
> +  if (flag_wpa)
> +    {
> +      FOR_EACH_SYMBOL (snode)
> +     if (TREE_PUBLIC (snode->decl))
> +         map.put (snode->asm_name (), snode);
> +    }
> +  else
> +    {
> +      FOR_EACH_SYMBOL (snode)
> +     map.put (snode->asm_name (), snode);
> +    }
> +
> +  auto_vec<char> ident;
> +
> +  for (; anode; anode = safe_as_a<asm_node*> (anode->next))
> +    {
> +      if (TREE_CODE (anode->asm_str) != STRING_CST)
> +     continue;
> +
> +      const char *asm_str = TREE_STRING_POINTER (anode->asm_str);
> +      int asm_len = TREE_STRING_LENGTH (anode->asm_str);
> +
> +      if (dump_file)
> +     fprintf (dump_file, "Searching for symbols in toplevel asm:\n%s\n---\n",
> +              asm_str);
> +
> +      for (int i = 0; i < asm_len + 1; ++i)
> +     {
> +       char c = 0;
> +       if (i < asm_len)
> +         c = asm_str[i];
> +
> +       if ('0' <= c && c <= '9')
> +         {
> +           if (ident.length ())
> +             ident.safe_push (c);
> +         }
> +       else if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_')
> +         ident.safe_push (c);
> +       else if (ident.length ())
> +         {
> +           ident.safe_push ('\0');
> +           symtab_node **n_ = map.get (ident.begin ());
> +
> +           if (n_)
> +             {
> +               bool is_label = c == ':';
> +
> +               if (dump_file)
> +                 fprintf (dump_file, "Found symbol '%s' (is_label: %d)\n",
> +                          ident.begin (), is_label);
> +
> +               mark_fragile_ref_by_asm (*n_, is_label);
> +             }
> +
> +           ident.truncate (0);
> +         }
> +     }
> +    }
> +}
> +
> +namespace {
> +
> +const pass_data pass_data_ipa_asm =
> +{
> +  IPA_PASS, /* type */
> +  "ipa-asm", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_IPA_LTO_ASM, /* tv_id */
> +  0, /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  0, /* todo_flags_finish */
> +};
> +
> +class pass_ipa_asm : public ipa_opt_pass_d
> +{
> +public:
> +  pass_ipa_asm (gcc::context *ctxt)
> +    : ipa_opt_pass_d (pass_data_ipa_asm, ctxt,
> +                   NULL, /* generate_summary */
> +                   NULL, /* write_summary */
> +                   NULL, /* read_summary */
> +                   NULL, /* write_optimization_summary */
> +                   NULL, /* read_optimization_summary */
> +                   NULL, /* stmt_fixup */
> +                   0, /* function_transform_todo_flags_start */
> +                   NULL, /* function_transform */
> +                   NULL) /* variable_transform */
> +  {}
> +
> +  /* opt_pass methods: */
> +
> +  bool gate (function *) final override
> +    {
> +      return (flag_lto || flag_wpa) && flag_toplevel_asm_heuristics;
> +    }
> +
> +  unsigned int execute (function *) final override
> +    {
> +      ipa_asm_heuristics ();
> +      return 0;
> +    }
> +
> +};
> +
> +} // anon namespace
> +
> +ipa_opt_pass_d *
> +make_pass_ipa_asm_lgen (gcc::context *ctxt)
> +{
> +  return new pass_ipa_asm (ctxt);
> +}
> +
> +ipa_opt_pass_d *
> +make_pass_ipa_asm_wpa (gcc::context *ctxt)
> +{
> +  return new pass_ipa_asm (ctxt);
> +}
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 50de980615f..84d1bb5e150 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2360,6 +2360,10 @@ flto-partition=
>  Common Joined RejectNegative Enum(lto_partition_model) 
> Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED)
>  Specify the algorithm to partition symbols and vars at linktime.
>  
> +flto-toplevel-asm-heuristics
> +Common Var(flag_toplevel_asm_heuristics) Init(0)
> +Enable heuristics to recognize symbol identifiers in toplevel (not extended) 
> assembly and prevent their mangling/deletion.
> +
>  ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h.
>  flto-compression-level=
>  Common Joined RejectNegative UInteger Var(flag_lto_compression_level) 
> Init(-1) IntegerRange(0, 19)
> diff --git a/gcc/passes.def b/gcc/passes.def
> index fac04cd86c7..c97eb069bfc 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -157,10 +157,12 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_ipa_increase_alignment);
>    NEXT_PASS (pass_ipa_tm);
>    NEXT_PASS (pass_ipa_lower_emutls);
> +  NEXT_PASS (pass_ipa_asm_lgen);
>    TERMINATE_PASS_LIST (all_small_ipa_passes)
>  
>    INSERT_PASSES_AFTER (all_regular_ipa_passes)
>    NEXT_PASS (pass_analyzer);
> +  NEXT_PASS (pass_ipa_asm_wpa);
>    NEXT_PASS (pass_ipa_odr);
>    NEXT_PASS (pass_ipa_whole_program_visibility);
>    NEXT_PASS (pass_ipa_profile);
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> new file mode 100644
> index 00000000000..9d653d4c7a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> @@ -0,0 +1,19 @@
> +/* { dg-lto-do link } */
> +/* { dg-lto-options { {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=1to1} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=max} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=cache}} } */
> +
> +void asm_fn();
> +void asm_fn_used();
> +
> +asm(".global asm_fn\nasm_fn:");
> +asm(".global asm_fn_used\nasm_fn_used:");
> +
> +
> +__attribute__((noinline))
> +int privatized_fn(int v) { return v + v;}
> +
> +extern void call_privatized_fn();
> +
> +int main() {
> +  privatized_fn (0);
> +  call_privatized_fn ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> new file mode 100644
> index 00000000000..9544f69f4e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> @@ -0,0 +1,12 @@
> +extern void asm_fn_used();
> +
> +__attribute__((used))
> +void local_caller() {
> +  asm_fn_used();
> +}
> +
> +__attribute__((noipa))
> +static void privatized_fn() { asm volatile ("");}
> +asm(".long privatized_fn");
> +
> +void call_privatized_fn() { privatized_fn();}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> new file mode 100644
> index 00000000000..315149d7800
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> @@ -0,0 +1,10 @@
> +/* { dg-lto-do link } */
> +/* { dg-lto-options { {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=1to1} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=max} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=cache}} } */
> +
> +extern int use_statics ();
> +
> +extern int asm_var;
> +
> +int main() {
> +  return asm_var + use_statics ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> new file mode 100644
> index 00000000000..1d7c3798963
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> @@ -0,0 +1,12 @@
> +extern void static_asm_fn ();
> +extern int static_asm_var;
> +asm("static_asm_fn:");
> +asm("static_asm_var:");
> +
> +extern int asm_var;
> +asm(".global asm_var\nasm_var:");
> +
> +int use_statics () {
> +  static_asm_fn ();
> +  return static_asm_var;
> +}
> diff --git a/gcc/timevar.def b/gcc/timevar.def
> index 4f60f04baa1..f0f4521894a 100644
> --- a/gcc/timevar.def
> +++ b/gcc/timevar.def
> @@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining 
> heuristics")
>  DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
>  DEFTIMEVAR (TV_IPA_COMDATS        , "ipa comdats")
>  DEFTIMEVAR (TV_IPA_OPT                    , "ipa various optimizations")
> +DEFTIMEVAR (TV_IPA_LTO_ASM        , "ipa lto asm heuristics")
>  DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS    , "lto stream decompression")
>  DEFTIMEVAR (TV_IPA_LTO_COMPRESS      , "lto stream compression")
>  DEFTIMEVAR (TV_IPA_LTO_OUTPUT        , "lto stream output")
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index 410341d4711..b0f6ab4cca8 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -527,6 +527,8 @@ extern simple_ipa_opt_pass 
> *make_pass_local_optimization_passes (gcc::context *c
>  extern simple_ipa_opt_pass *make_pass_ipa_remove_symbols (gcc::context 
> *ctxt);
>  
>  extern ipa_opt_pass_d *make_pass_analyzer (gcc::context *ctxt);
> +extern ipa_opt_pass_d *make_pass_ipa_asm_lgen (gcc::context *ctxt);
> +extern ipa_opt_pass_d *make_pass_ipa_asm_wpa (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_whole_program_visibility (gcc::context
>                                                              *ctxt);
>  extern simple_ipa_opt_pass *make_pass_ipa_increase_alignment (gcc::context

Reply via email to