Hi!

I'd like to ping this patch for stage1.
Bootstrapped/regtested it again last night successfully.

On Fri, Feb 08, 2019 at 08:36:33AM +0100, Jakub Jelinek wrote:
> The following patch uses a simple data flow to find live (addressable)
> variables at certain spots (for tail call discovery at the point of the
> potential tail call, so that we don't reject tail calls because of variables
> that can be known out of scope already so that people don't have to find
> ugly workarounds if they really need tail calls, and for the recently
> added inline EH pad clobber addition so that we don't add too many
> variables).  Bootstrapped/regtested on x86_64-linux and i686-linux.
> 
> Haven't gathered statistics on how often does it trigger yet.
> Shall I use double queue (pending/working sbitmaps) to speed it up (guess I
> could gather statistics if that helps reasonably)?  But if so, perhaps
> add_scope_conflicts should change too.  I haven't shared code with
> what add_scope_conflicts does because it isn't really that big chunk of code
> and would need many modifications to make it generic enough to handle
> add_scope_conflicts needs, plus as I wanted to make it a helper for other
> optimizations, I didn't want to use bb->aux etc.  For tail call, I see
> another option to compute it unconditionally once at the start of the pass
> for all may_be_aliased auto_var_in_fn_p vars and then just walk a single
> bb with each (potential) tail call, instead of computing it for every
> potential tail call again (on the other side with perhaps smaller set of
> variables).  In that case e.g. vars == NULL argument could imply the
> VAR_P && may_be_aliased && auto_var_in_fn_p test instead of bitmap_bit_p
> test, we could drop the stop_after argument and instead export the _1
> function renamed to something to walk a single bb (or wrapper to it that
> would set up the structure) until stop_after.
> 
> Thoughts on this?
> 
> 2019-02-08  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/89060
>       * tree-ssa-live.h (compute_live_vars, destroy_live_vars): Declare.
>       * tree-ssa-live.c: Include gimple-walk.h and cfganal.h.
>       (struct compute_live_vars_data): New type.
>       (compute_live_vars_visit, compute_live_vars_1, compute_live_vars,
>       destroy_live_vars): New functions.
>       * tree-tailcall.c (find_tail_calls): Perform variable life analysis
>       before punting.
>       * tree-inline.h (struct copy_body_data): Add eh_landing_pad_dest
>       member.
>       * tree-inline.c (add_clobbers_to_eh_landing_pad): Remove BB argument.
>       Perform variable life analysis to select variables that really need
>       clobbers added.
>       (copy_edges_for_bb): Don't call add_clobbers_to_eh_landing_pad here,
>       instead set id->eh_landing_pad_dest and assert it is the same.
>       (copy_cfg_body): Call it here if id->eh_landing_pad_dest is non-NULL.
> 
>       * gcc.dg/tree-ssa/pr89060.c: New test.
> 
> --- gcc/tree-ssa-live.h.jj    2019-01-01 12:37:16.967978068 +0100
> +++ gcc/tree-ssa-live.h       2019-02-07 19:02:58.233530924 +0100
> @@ -265,6 +265,8 @@ extern tree_live_info_p calculate_live_r
>  extern void debug (tree_live_info_d &ref);
>  extern void debug (tree_live_info_d *ptr);
>  extern void dump_live_info (FILE *, tree_live_info_p, int);
> +extern vec<bitmap_head> compute_live_vars (struct function *, bitmap, gimple 
> *);
> +extern void destroy_live_vars (vec<bitmap_head> &);
>  
>  
>  /*  Return TRUE if P is marked as a global in LIVE.  */
> --- gcc/tree-ssa-live.c.jj    2019-01-01 12:37:16.055993032 +0100
> +++ gcc/tree-ssa-live.c       2019-02-07 19:34:33.046401259 +0100
> @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.
>  #include "stringpool.h"
>  #include "attribs.h"
>  #include "optinfo.h"
> +#include "gimple-walk.h"
> +#include "cfganal.h"
>  
>  static void verify_live_on_entry (tree_live_info_p);
>  
> @@ -1194,8 +1196,142 @@ calculate_live_ranges (var_map map, bool
>  
>    return live;
>  }
> +
> +/* Data structure for compute_live_vars* functions.  */
>  
> +struct compute_live_vars_data {
> +  /* Vector of bitmaps for live vars at the end of basic blocks,
> +     indexed by bb->index.  ACTIVE[ENTRY_BLOCK] must be empty bitmap,
> +     ACTIVE[EXIT_BLOCK] is used for STOP_AFTER.  */
> +  vec<bitmap_head> active;
> +  /* Work bitmap of currently live variables.  */
> +  bitmap work;
> +  /* Bitmap of interesting variables.  Variables with uids not in this
> +     bitmap are not tracked.  */
> +  bitmap vars;
> +};
>  
> +/* Callback for walk_stmt_load_store_addr_ops.  If OP is a VAR_DECL with
> +   uid set in DATA->vars, enter its uid into bitmap DATA->work.  */
> +
> +static bool
> +compute_live_vars_visit (gimple *, tree op, tree, void *pdata)
> +{
> +  compute_live_vars_data *data = (compute_live_vars_data *) pdata;
> +  op = get_base_address (op);
> +  if (op && VAR_P (op) && bitmap_bit_p (data->vars, DECL_UID (op)))
> +    bitmap_set_bit (data->work, DECL_UID (op));
> +  return false;
> +}
> +
> +/* Helper routine for compute_live_vars, calculating the sets of live
> +   variables at the end of BB, leaving the result in DATA->work.
> +   If STOP_AFTER is non-NULL, stop processing after that stmt.  */
> +
> +static void
> +compute_live_vars_1 (basic_block bb, compute_live_vars_data *data,
> +                  gimple *stop_after)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  gimple_stmt_iterator gsi;
> +  walk_stmt_load_store_addr_fn visit = compute_live_vars_visit;
> +
> +  bitmap_clear (data->work);
> +  FOR_EACH_EDGE (e, ei, bb->preds)
> +    bitmap_ior_into (data->work, &data->active[e->src->index]);
> +
> +  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    walk_stmt_load_store_addr_ops (gsi_stmt (gsi), data, NULL, NULL, visit);
> +  for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple *stmt = gsi_stmt (gsi);
> +
> +      if (gimple_clobber_p (stmt))
> +     {
> +       tree lhs = gimple_assign_lhs (stmt);
> +       if (VAR_P (lhs) && bitmap_bit_p (data->vars, DECL_UID (lhs)))
> +         bitmap_clear_bit (data->work, DECL_UID (lhs));
> +     }
> +      else if (!is_gimple_debug (stmt))
> +     walk_stmt_load_store_addr_ops (stmt, data, visit, visit, visit);
> +      if (stmt == stop_after)
> +     break;
> +    }
> +}
> +
> +/* For function FN and bitmap of automatic variables VARS, compute which
> +   of those variables are (might be) live at the end of each basic block.
> +   If STOP_AFTER is non-NULL, compute additionally a bitmap of variables
> +   live after the STOP_AFTER statement and store that into element 1
> +   of the returned vector.  */
> +
> +vec<bitmap_head>
> +compute_live_vars (struct function *fn, bitmap vars, gimple *stop_after)
> +{
> +  vec<bitmap_head> active;
> +
> +  /* We approximate the live range of a stack variable by taking the first
> +     mention of its name as starting point(s), and by the end-of-scope
> +     death clobber added by gimplify as ending point(s) of the range.
> +     This overapproximates in the case we for instance moved an address-taken
> +     operation upward, without also moving a dereference to it upwards.
> +     But it's conservatively correct as a variable never can hold values
> +     before its name is mentioned at least once.
> +
> +     We then do a mostly classical bitmap liveness algorithm.  */
> +
> +  active.create (last_basic_block_for_fn (fn));
> +  active.quick_grow (last_basic_block_for_fn (fn));
> +  for (int i = 0; i < last_basic_block_for_fn (fn); i++)
> +    bitmap_initialize (&active[i], &bitmap_default_obstack);
> +
> +  bitmap work = BITMAP_ALLOC (NULL);
> +
> +  int *rpo = XNEWVEC (int, last_basic_block_for_fn (fn));
> +  int n_bbs = pre_and_rev_post_order_compute_fn (fn, NULL, rpo, false);
> +
> +  bool changed = true;
> +  compute_live_vars_data data = { active, work, vars };
> +  while (changed)
> +    {
> +      int i;
> +      changed = false;
> +      for (i = 0; i < n_bbs; i++)
> +     {
> +       basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[i]);
> +       compute_live_vars_1 (bb, &data, NULL);
> +       if (bitmap_ior_into (&active[bb->index], work))
> +         changed = true;
> +     }
> +    }
> +
> +  free (rpo);
> +
> +  if (stop_after)
> +    {
> +      basic_block bb = gimple_bb (stop_after);
> +      compute_live_vars_1 (bb, &data, stop_after);
> +      bitmap_copy (&active[EXIT_BLOCK], work);
> +    }
> +
> +  BITMAP_FREE (work);
> +
> +  return active;
> +}
> +
> +/* Destroy what compute_live_vars has returned when it is no longer needed.  
> */
> +
> +void
> +destroy_live_vars (vec<bitmap_head> &active)
> +{
> +  unsigned len = active.length ();
> +  for (unsigned i = 0; i < len; i++)
> +    bitmap_clear (&active[i]);
> +
> +  active.release ();
> +}
> +
>  /* Output partition map MAP to file F.  */
>  
>  void
> --- gcc/tree-tailcall.c.jj    2019-01-01 12:37:21.359906007 +0100
> +++ gcc/tree-tailcall.c       2019-02-07 19:20:15.496501224 +0100
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
>  #include "cfgloop.h"
>  #include "common/common-target.h"
>  #include "ipa-utils.h"
> +#include "tree-ssa-live.h"
>  
>  /* The file implements the tail recursion elimination.  It is also used to
>     analyze the tail calls in general, passing the results to the rtl level
> @@ -515,6 +516,7 @@ find_tail_calls (basic_block bb, struct
>    /* Make sure the tail invocation of this function does not indirectly
>       refer to local variables.  (Passing variables directly by value
>       is OK.)  */
> +  bitmap local_decls = NULL;
>    FOR_EACH_LOCAL_DECL (cfun, idx, var)
>      {
>        if (TREE_CODE (var) != PARM_DECL
> @@ -522,6 +524,28 @@ find_tail_calls (basic_block bb, struct
>         && may_be_aliased (var)
>         && (ref_maybe_used_by_stmt_p (call, var)
>             || call_may_clobber_ref_p (call, var)))
> +     {
> +       if (!VAR_P (var))
> +         {
> +           if (local_decls)
> +             BITMAP_FREE (local_decls);
> +           return;
> +         }
> +       if (local_decls == NULL)
> +         local_decls = BITMAP_ALLOC (NULL);
> +       bitmap_set_bit (local_decls, DECL_UID (var));
> +     }
> +    }
> +
> +  /* If any such variables are found, try harder using variable life
> +     tracking to see if those variables aren't already out of scope.  */
> +  if (local_decls)
> +    {
> +      vec<bitmap_head> live = compute_live_vars (cfun, local_decls, call);
> +      bool any_live_p = !bitmap_empty_p (&live[EXIT_BLOCK]);
> +      destroy_live_vars (live);
> +      BITMAP_FREE (local_decls);
> +      if (any_live_p)
>       return;
>      }
>  
> --- gcc/tree-inline.h.jj      2019-01-18 11:06:53.526717141 +0100
> +++ gcc/tree-inline.h 2019-02-07 19:46:45.090363784 +0100
> @@ -156,6 +156,10 @@ struct copy_body_data
>       when inlining a call within an OpenMP SIMD-on-SIMT loop.  */
>    vec<tree> *dst_simt_vars;
>  
> +  /* Basic block to which clobbers for local variables from the inline
> +     function that need to live in memory should be added.  */
> +  basic_block eh_landing_pad_dest;
> +
>    /* If clobbers for local variables from the inline function
>       that need to live in memory should be added to EH landing pads
>       outside of the inlined function, this should be the number
> --- gcc/tree-inline.c.jj      2019-01-27 12:55:19.639502799 +0100
> +++ gcc/tree-inline.c 2019-02-07 20:54:11.237908304 +0100
> @@ -61,6 +61,7 @@ along with GCC; see the file COPYING3.
>  #include "attribs.h"
>  #include "sreal.h"
>  #include "tree-cfgcleanup.h"
> +#include "tree-ssa-live.h"
>  
>  /* I'm not real happy about this, but we need to handle gimple and
>     non-gimple trees.  */
> @@ -2191,12 +2192,14 @@ update_ssa_across_abnormal_edges (basic_
>  }
>  
>  /* Insert clobbers for automatic variables of inlined ID->src_fn
> -   function at the start of basic block BB.  */
> +   function at the start of basic block ID->eh_landing_pad_dest.  */
>  
>  static void
> -add_clobbers_to_eh_landing_pad (basic_block bb, copy_body_data *id)
> +add_clobbers_to_eh_landing_pad (copy_body_data *id)
>  {
>    tree var;
> +  basic_block bb = id->eh_landing_pad_dest;
> +  bitmap vars = NULL;
>    unsigned int i;
>    FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var)
>      if (VAR_P (var)
> @@ -2218,12 +2221,44 @@ add_clobbers_to_eh_landing_pad (basic_bl
>           && !is_gimple_reg (new_var)
>           && auto_var_in_fn_p (new_var, id->dst_fn))
>         {
> +         if (vars == NULL)
> +           vars = BITMAP_ALLOC (NULL);
> +         bitmap_set_bit (vars, DECL_UID (var));
> +       }
> +      }
> +  if (vars == NULL)
> +    return;
> +
> +  vec<bitmap_head> live = compute_live_vars (id->src_cfun, vars, NULL);
> +  FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var)
> +    if (VAR_P (var) && bitmap_bit_p (vars, DECL_UID (var)))
> +      {
> +     edge e;
> +     edge_iterator ei;
> +     bool needed = false;
> +     FOR_EACH_EDGE (e, ei, bb->preds)
> +       if ((e->flags & EDGE_EH) != 0
> +           && e->src->index >= id->add_clobbers_to_eh_landing_pads)
> +         {
> +           basic_block src_bb = (basic_block) e->src->aux;
> +
> +           if (bitmap_bit_p (&live[src_bb->index], DECL_UID (var)))
> +             {
> +               needed = true;
> +               break;
> +             }
> +         }
> +     if (needed)
> +       {
> +         tree new_var = *id->decl_map->get (var);
>           gimple_stmt_iterator gsi = gsi_after_labels (bb);
>           tree clobber = build_clobber (TREE_TYPE (new_var));
>           gimple *clobber_stmt = gimple_build_assign (new_var, clobber);
>           gsi_insert_before (&gsi, clobber_stmt, GSI_NEW_STMT);
>         }
>        }
> +  destroy_live_vars (live);
> +  BITMAP_FREE (vars);
>  }
>  
>  /* Copy edges from BB into its copy constructed earlier, scale profile
> @@ -2358,8 +2393,10 @@ copy_edges_for_bb (basic_block bb, profi
>                 e->probability = profile_probability::never ();
>               if (e->dest->index < id->add_clobbers_to_eh_landing_pads)
>                 {
> -                 add_clobbers_to_eh_landing_pad (e->dest, id);
> -                 id->add_clobbers_to_eh_landing_pads = 0;
> +                 if (id->eh_landing_pad_dest == NULL)
> +                   id->eh_landing_pad_dest = e->dest;
> +                 else
> +                   gcc_assert (id->eh_landing_pad_dest == e->dest);
>                 }
>             }
>          }
> @@ -2802,6 +2839,12 @@ copy_cfg_body (copy_body_data * id,
>        need_debug_cleanup |= copy_edges_for_bb (bb, num, den, exit_block_map,
>                                              abnormal_goto_dest, id);
>  
> +  if (id->eh_landing_pad_dest)
> +    {
> +      add_clobbers_to_eh_landing_pad (id);
> +      id->eh_landing_pad_dest = NULL;
> +    }
> +
>    if (new_entry)
>      {
>        edge e = make_edge (entry_block_map, (basic_block)new_entry->aux,
> --- gcc/testsuite/gcc.dg/tree-ssa/pr89060.c.jj        2019-02-07 
> 19:39:13.481790192 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr89060.c   2019-02-07 19:40:17.532736916 
> +0100
> @@ -0,0 +1,26 @@
> +/* PR tree-optimization/89060 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-tailc" } */
> +/* { dg-final { scan-tree-dump-not "baz \\\(1\\\); \\\[tail call\\\]" 
> "tailc" } } */
> +/* { dg-final { scan-tree-dump "baz \\\(2\\\); \\\[tail call\\\]" "tailc" } 
> } */
> +
> +void qux (char *, int n);
> +int baz (int);
> +
> +int
> +foo (int n)
> +{
> +  char buf[64];
> +  qux (buf, n);
> +  return baz (1);
> +}
> +
> +int
> +bar (int n)
> +{
> +  {
> +    char buf[64];
> +    qux (buf, n);
> +  }
> +  return baz (2);
> +}

        Jakub

Reply via email to