Hi! I'd like to ping this patch for stage1. Bootstrapped/regtested it again last night successfully.
On Fri, Feb 08, 2019 at 08:36:33AM +0100, Jakub Jelinek wrote: > The following patch uses a simple data flow to find live (addressable) > variables at certain spots (for tail call discovery at the point of the > potential tail call, so that we don't reject tail calls because of variables > that can be known out of scope already so that people don't have to find > ugly workarounds if they really need tail calls, and for the recently > added inline EH pad clobber addition so that we don't add too many > variables). Bootstrapped/regtested on x86_64-linux and i686-linux. > > Haven't gathered statistics on how often does it trigger yet. > Shall I use double queue (pending/working sbitmaps) to speed it up (guess I > could gather statistics if that helps reasonably)? But if so, perhaps > add_scope_conflicts should change too. I haven't shared code with > what add_scope_conflicts does because it isn't really that big chunk of code > and would need many modifications to make it generic enough to handle > add_scope_conflicts needs, plus as I wanted to make it a helper for other > optimizations, I didn't want to use bb->aux etc. For tail call, I see > another option to compute it unconditionally once at the start of the pass > for all may_be_aliased auto_var_in_fn_p vars and then just walk a single > bb with each (potential) tail call, instead of computing it for every > potential tail call again (on the other side with perhaps smaller set of > variables). In that case e.g. vars == NULL argument could imply the > VAR_P && may_be_aliased && auto_var_in_fn_p test instead of bitmap_bit_p > test, we could drop the stop_after argument and instead export the _1 > function renamed to something to walk a single bb (or wrapper to it that > would set up the structure) until stop_after. > > Thoughts on this? > > 2019-02-08 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/89060 > * tree-ssa-live.h (compute_live_vars, destroy_live_vars): Declare. > * tree-ssa-live.c: Include gimple-walk.h and cfganal.h. > (struct compute_live_vars_data): New type. > (compute_live_vars_visit, compute_live_vars_1, compute_live_vars, > destroy_live_vars): New functions. > * tree-tailcall.c (find_tail_calls): Perform variable life analysis > before punting. > * tree-inline.h (struct copy_body_data): Add eh_landing_pad_dest > member. > * tree-inline.c (add_clobbers_to_eh_landing_pad): Remove BB argument. > Perform variable life analysis to select variables that really need > clobbers added. > (copy_edges_for_bb): Don't call add_clobbers_to_eh_landing_pad here, > instead set id->eh_landing_pad_dest and assert it is the same. > (copy_cfg_body): Call it here if id->eh_landing_pad_dest is non-NULL. > > * gcc.dg/tree-ssa/pr89060.c: New test. > > --- gcc/tree-ssa-live.h.jj 2019-01-01 12:37:16.967978068 +0100 > +++ gcc/tree-ssa-live.h 2019-02-07 19:02:58.233530924 +0100 > @@ -265,6 +265,8 @@ extern tree_live_info_p calculate_live_r > extern void debug (tree_live_info_d &ref); > extern void debug (tree_live_info_d *ptr); > extern void dump_live_info (FILE *, tree_live_info_p, int); > +extern vec<bitmap_head> compute_live_vars (struct function *, bitmap, gimple > *); > +extern void destroy_live_vars (vec<bitmap_head> &); > > > /* Return TRUE if P is marked as a global in LIVE. */ > --- gcc/tree-ssa-live.c.jj 2019-01-01 12:37:16.055993032 +0100 > +++ gcc/tree-ssa-live.c 2019-02-07 19:34:33.046401259 +0100 > @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. > #include "stringpool.h" > #include "attribs.h" > #include "optinfo.h" > +#include "gimple-walk.h" > +#include "cfganal.h" > > static void verify_live_on_entry (tree_live_info_p); > > @@ -1194,8 +1196,142 @@ calculate_live_ranges (var_map map, bool > > return live; > } > + > +/* Data structure for compute_live_vars* functions. */ > > +struct compute_live_vars_data { > + /* Vector of bitmaps for live vars at the end of basic blocks, > + indexed by bb->index. ACTIVE[ENTRY_BLOCK] must be empty bitmap, > + ACTIVE[EXIT_BLOCK] is used for STOP_AFTER. */ > + vec<bitmap_head> active; > + /* Work bitmap of currently live variables. */ > + bitmap work; > + /* Bitmap of interesting variables. Variables with uids not in this > + bitmap are not tracked. */ > + bitmap vars; > +}; > > +/* Callback for walk_stmt_load_store_addr_ops. If OP is a VAR_DECL with > + uid set in DATA->vars, enter its uid into bitmap DATA->work. */ > + > +static bool > +compute_live_vars_visit (gimple *, tree op, tree, void *pdata) > +{ > + compute_live_vars_data *data = (compute_live_vars_data *) pdata; > + op = get_base_address (op); > + if (op && VAR_P (op) && bitmap_bit_p (data->vars, DECL_UID (op))) > + bitmap_set_bit (data->work, DECL_UID (op)); > + return false; > +} > + > +/* Helper routine for compute_live_vars, calculating the sets of live > + variables at the end of BB, leaving the result in DATA->work. > + If STOP_AFTER is non-NULL, stop processing after that stmt. */ > + > +static void > +compute_live_vars_1 (basic_block bb, compute_live_vars_data *data, > + gimple *stop_after) > +{ > + edge e; > + edge_iterator ei; > + gimple_stmt_iterator gsi; > + walk_stmt_load_store_addr_fn visit = compute_live_vars_visit; > + > + bitmap_clear (data->work); > + FOR_EACH_EDGE (e, ei, bb->preds) > + bitmap_ior_into (data->work, &data->active[e->src->index]); > + > + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + walk_stmt_load_store_addr_ops (gsi_stmt (gsi), data, NULL, NULL, visit); > + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > + { > + gimple *stmt = gsi_stmt (gsi); > + > + if (gimple_clobber_p (stmt)) > + { > + tree lhs = gimple_assign_lhs (stmt); > + if (VAR_P (lhs) && bitmap_bit_p (data->vars, DECL_UID (lhs))) > + bitmap_clear_bit (data->work, DECL_UID (lhs)); > + } > + else if (!is_gimple_debug (stmt)) > + walk_stmt_load_store_addr_ops (stmt, data, visit, visit, visit); > + if (stmt == stop_after) > + break; > + } > +} > + > +/* For function FN and bitmap of automatic variables VARS, compute which > + of those variables are (might be) live at the end of each basic block. > + If STOP_AFTER is non-NULL, compute additionally a bitmap of variables > + live after the STOP_AFTER statement and store that into element 1 > + of the returned vector. */ > + > +vec<bitmap_head> > +compute_live_vars (struct function *fn, bitmap vars, gimple *stop_after) > +{ > + vec<bitmap_head> active; > + > + /* We approximate the live range of a stack variable by taking the first > + mention of its name as starting point(s), and by the end-of-scope > + death clobber added by gimplify as ending point(s) of the range. > + This overapproximates in the case we for instance moved an address-taken > + operation upward, without also moving a dereference to it upwards. > + But it's conservatively correct as a variable never can hold values > + before its name is mentioned at least once. > + > + We then do a mostly classical bitmap liveness algorithm. */ > + > + active.create (last_basic_block_for_fn (fn)); > + active.quick_grow (last_basic_block_for_fn (fn)); > + for (int i = 0; i < last_basic_block_for_fn (fn); i++) > + bitmap_initialize (&active[i], &bitmap_default_obstack); > + > + bitmap work = BITMAP_ALLOC (NULL); > + > + int *rpo = XNEWVEC (int, last_basic_block_for_fn (fn)); > + int n_bbs = pre_and_rev_post_order_compute_fn (fn, NULL, rpo, false); > + > + bool changed = true; > + compute_live_vars_data data = { active, work, vars }; > + while (changed) > + { > + int i; > + changed = false; > + for (i = 0; i < n_bbs; i++) > + { > + basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[i]); > + compute_live_vars_1 (bb, &data, NULL); > + if (bitmap_ior_into (&active[bb->index], work)) > + changed = true; > + } > + } > + > + free (rpo); > + > + if (stop_after) > + { > + basic_block bb = gimple_bb (stop_after); > + compute_live_vars_1 (bb, &data, stop_after); > + bitmap_copy (&active[EXIT_BLOCK], work); > + } > + > + BITMAP_FREE (work); > + > + return active; > +} > + > +/* Destroy what compute_live_vars has returned when it is no longer needed. > */ > + > +void > +destroy_live_vars (vec<bitmap_head> &active) > +{ > + unsigned len = active.length (); > + for (unsigned i = 0; i < len; i++) > + bitmap_clear (&active[i]); > + > + active.release (); > +} > + > /* Output partition map MAP to file F. */ > > void > --- gcc/tree-tailcall.c.jj 2019-01-01 12:37:21.359906007 +0100 > +++ gcc/tree-tailcall.c 2019-02-07 19:20:15.496501224 +0100 > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. > #include "cfgloop.h" > #include "common/common-target.h" > #include "ipa-utils.h" > +#include "tree-ssa-live.h" > > /* The file implements the tail recursion elimination. It is also used to > analyze the tail calls in general, passing the results to the rtl level > @@ -515,6 +516,7 @@ find_tail_calls (basic_block bb, struct > /* Make sure the tail invocation of this function does not indirectly > refer to local variables. (Passing variables directly by value > is OK.) */ > + bitmap local_decls = NULL; > FOR_EACH_LOCAL_DECL (cfun, idx, var) > { > if (TREE_CODE (var) != PARM_DECL > @@ -522,6 +524,28 @@ find_tail_calls (basic_block bb, struct > && may_be_aliased (var) > && (ref_maybe_used_by_stmt_p (call, var) > || call_may_clobber_ref_p (call, var))) > + { > + if (!VAR_P (var)) > + { > + if (local_decls) > + BITMAP_FREE (local_decls); > + return; > + } > + if (local_decls == NULL) > + local_decls = BITMAP_ALLOC (NULL); > + bitmap_set_bit (local_decls, DECL_UID (var)); > + } > + } > + > + /* If any such variables are found, try harder using variable life > + tracking to see if those variables aren't already out of scope. */ > + if (local_decls) > + { > + vec<bitmap_head> live = compute_live_vars (cfun, local_decls, call); > + bool any_live_p = !bitmap_empty_p (&live[EXIT_BLOCK]); > + destroy_live_vars (live); > + BITMAP_FREE (local_decls); > + if (any_live_p) > return; > } > > --- gcc/tree-inline.h.jj 2019-01-18 11:06:53.526717141 +0100 > +++ gcc/tree-inline.h 2019-02-07 19:46:45.090363784 +0100 > @@ -156,6 +156,10 @@ struct copy_body_data > when inlining a call within an OpenMP SIMD-on-SIMT loop. */ > vec<tree> *dst_simt_vars; > > + /* Basic block to which clobbers for local variables from the inline > + function that need to live in memory should be added. */ > + basic_block eh_landing_pad_dest; > + > /* If clobbers for local variables from the inline function > that need to live in memory should be added to EH landing pads > outside of the inlined function, this should be the number > --- gcc/tree-inline.c.jj 2019-01-27 12:55:19.639502799 +0100 > +++ gcc/tree-inline.c 2019-02-07 20:54:11.237908304 +0100 > @@ -61,6 +61,7 @@ along with GCC; see the file COPYING3. > #include "attribs.h" > #include "sreal.h" > #include "tree-cfgcleanup.h" > +#include "tree-ssa-live.h" > > /* I'm not real happy about this, but we need to handle gimple and > non-gimple trees. */ > @@ -2191,12 +2192,14 @@ update_ssa_across_abnormal_edges (basic_ > } > > /* Insert clobbers for automatic variables of inlined ID->src_fn > - function at the start of basic block BB. */ > + function at the start of basic block ID->eh_landing_pad_dest. */ > > static void > -add_clobbers_to_eh_landing_pad (basic_block bb, copy_body_data *id) > +add_clobbers_to_eh_landing_pad (copy_body_data *id) > { > tree var; > + basic_block bb = id->eh_landing_pad_dest; > + bitmap vars = NULL; > unsigned int i; > FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var) > if (VAR_P (var) > @@ -2218,12 +2221,44 @@ add_clobbers_to_eh_landing_pad (basic_bl > && !is_gimple_reg (new_var) > && auto_var_in_fn_p (new_var, id->dst_fn)) > { > + if (vars == NULL) > + vars = BITMAP_ALLOC (NULL); > + bitmap_set_bit (vars, DECL_UID (var)); > + } > + } > + if (vars == NULL) > + return; > + > + vec<bitmap_head> live = compute_live_vars (id->src_cfun, vars, NULL); > + FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var) > + if (VAR_P (var) && bitmap_bit_p (vars, DECL_UID (var))) > + { > + edge e; > + edge_iterator ei; > + bool needed = false; > + FOR_EACH_EDGE (e, ei, bb->preds) > + if ((e->flags & EDGE_EH) != 0 > + && e->src->index >= id->add_clobbers_to_eh_landing_pads) > + { > + basic_block src_bb = (basic_block) e->src->aux; > + > + if (bitmap_bit_p (&live[src_bb->index], DECL_UID (var))) > + { > + needed = true; > + break; > + } > + } > + if (needed) > + { > + tree new_var = *id->decl_map->get (var); > gimple_stmt_iterator gsi = gsi_after_labels (bb); > tree clobber = build_clobber (TREE_TYPE (new_var)); > gimple *clobber_stmt = gimple_build_assign (new_var, clobber); > gsi_insert_before (&gsi, clobber_stmt, GSI_NEW_STMT); > } > } > + destroy_live_vars (live); > + BITMAP_FREE (vars); > } > > /* Copy edges from BB into its copy constructed earlier, scale profile > @@ -2358,8 +2393,10 @@ copy_edges_for_bb (basic_block bb, profi > e->probability = profile_probability::never (); > if (e->dest->index < id->add_clobbers_to_eh_landing_pads) > { > - add_clobbers_to_eh_landing_pad (e->dest, id); > - id->add_clobbers_to_eh_landing_pads = 0; > + if (id->eh_landing_pad_dest == NULL) > + id->eh_landing_pad_dest = e->dest; > + else > + gcc_assert (id->eh_landing_pad_dest == e->dest); > } > } > } > @@ -2802,6 +2839,12 @@ copy_cfg_body (copy_body_data * id, > need_debug_cleanup |= copy_edges_for_bb (bb, num, den, exit_block_map, > abnormal_goto_dest, id); > > + if (id->eh_landing_pad_dest) > + { > + add_clobbers_to_eh_landing_pad (id); > + id->eh_landing_pad_dest = NULL; > + } > + > if (new_entry) > { > edge e = make_edge (entry_block_map, (basic_block)new_entry->aux, > --- gcc/testsuite/gcc.dg/tree-ssa/pr89060.c.jj 2019-02-07 > 19:39:13.481790192 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr89060.c 2019-02-07 19:40:17.532736916 > +0100 > @@ -0,0 +1,26 @@ > +/* PR tree-optimization/89060 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-tailc" } */ > +/* { dg-final { scan-tree-dump-not "baz \\\(1\\\); \\\[tail call\\\]" > "tailc" } } */ > +/* { dg-final { scan-tree-dump "baz \\\(2\\\); \\\[tail call\\\]" "tailc" } > } */ > + > +void qux (char *, int n); > +int baz (int); > + > +int > +foo (int n) > +{ > + char buf[64]; > + qux (buf, n); > + return baz (1); > +} > + > +int > +bar (int n) > +{ > + { > + char buf[64]; > + qux (buf, n); > + } > + return baz (2); > +} Jakub