On Thu, 17 Jan 2019, Jakub Jelinek wrote: > Hi! > > As the following testcases show, we are unable to share stack slots > for (large) variables from inline functions if something in those inline > functions can throw externally. > > The issue is that the clobbers we have even in the EH paths > are usually removed by ehcleanup1 which attempts to make the functions > smaller before inlining; if the only reason to throw internally and then > rethrow externally are clobber stmts, that is too high cost and so we > optimize them away. > > If such functions are inlined into a function that has some EH landing pads > active for the inlined functions though, those EH landing pads are where > the local variables are all considered to conflict between different > inlines, because there is no clobber on that path. > > The following patch fixes it by adding clobbers at the start of the EH > landing pad in the inlining function's caller if it was external throw > before and now is internal throw (i.e. when we add an edge from newly added > bb in the inlining to bb in the function from before inlining). > > If we throw externally from a function (even if it is inline function), all > local variables in that function are out of scope. > > I've bootstrapped/regtested an earlier version of this patch which just had > another basic_block member id->eh_landing_pad_bb used to assert that there > is at most one such landing pad (which survived x86_64-linux and i686-linux > bootstrap/regtest), ok for trunk if this patch passes bootstrap/regtest too? > > 2019-01-17 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/86214 > * tree-inline.h (struct copy_body_data): Add > add_clobbers_to_eh_landing_pads member. > * tree-inline.c (add_clobbers_to_eh_landing_pad): New function. > (copy_edges_for_bb): Call it if EH edge destination is < > id->add_clobbers_to_eh_landing_pads. Fix a comment typo. > (expand_call_inline): Set id->add_clobbers_to_eh_landing_pads > if flag_stack_reuse != SR_NONE and clear it afterwards. > > * g++.dg/opt/pr86214-1.C: New test. > * g++.dg/opt/pr86214-2.C: New test. > > --- gcc/tree-inline.h.jj 2019-01-17 13:19:56.678539594 +0100 > +++ gcc/tree-inline.h 2019-01-17 14:22:56.138496295 +0100 > @@ -155,6 +155,12 @@ struct copy_body_data > /* A list of addressable local variables remapped into the caller > when inlining a call within an OpenMP SIMD-on-SIMT loop. */ > vec<tree> *dst_simt_vars; > + > + /* If clobbers for local variables from the inline function > + that need to live in memory should be added to EH landing pads > + outside of the inlined function, this should be the number > + of basic blocks in the caller before inlining. Zero otherwise. */ > + int add_clobbers_to_eh_landing_pads; > }; > > /* Weights of constructions for estimate_num_insns. */ > --- gcc/tree-inline.c.jj 2019-01-17 13:19:56.668539756 +0100 > +++ gcc/tree-inline.c 2019-01-17 14:23:42.364746520 +0100 > @@ -2190,6 +2190,40 @@ update_ssa_across_abnormal_edges (basic_ > } > } > > +/* Insert clobbers for automatic variables of inlined ID->src_fn > + function at the start of basic block BB. */ > + > +static void > +add_clobbers_to_eh_landing_pad (basic_block bb, copy_body_data *id) > +{ > + tree var; > + unsigned int i; > + FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var) > + if (VAR_P (var) > + && !DECL_HARD_REGISTER (var) > + && !TREE_THIS_VOLATILE (var) > + && !DECL_HAS_VALUE_EXPR_P (var) > + && !is_gimple_reg (var) > + && auto_var_in_fn_p (var, id->src_fn)) > + { > + tree *t = id->decl_map->get (var); > + if (!t) > + continue; > + tree new_var = *t; > + if (VAR_P (new_var) > + && !DECL_HARD_REGISTER (new_var) > + && !TREE_THIS_VOLATILE (new_var) > + && !DECL_HAS_VALUE_EXPR_P (new_var) > + && !is_gimple_reg (new_var) > + && auto_var_in_fn_p (new_var, id->dst_fn)) > + { > + gimple_stmt_iterator gsi = gsi_after_labels (bb); > + tree clobber = build_clobber (TREE_TYPE (new_var)); > + gimple *clobber_stmt = gimple_build_assign (new_var, clobber); > + gsi_insert_before (&gsi, clobber_stmt, GSI_NEW_STMT); > + } > + }
So we do not care to optimize this to only clobber the vars that are appear live over the EH edge? > +} > > /* Copy edges from BB into its copy constructed earlier, scale profile > accordingly. Edges will be taken care of later. Assume aux > @@ -2232,7 +2266,7 @@ copy_edges_for_bb (basic_block bb, profi > if (bb->index == ENTRY_BLOCK || bb->index == EXIT_BLOCK) > return false; > > - /* When doing function splitting, we must decreate count of the return > block > + /* When doing function splitting, we must decrease count of the return > block > which was previously reachable by block we did not copy. */ > if (single_succ_p (bb) && single_succ_edge (bb)->dest->index == EXIT_BLOCK) > FOR_EACH_EDGE (old_edge, ei, bb->preds) > @@ -2317,8 +2351,16 @@ copy_edges_for_bb (basic_block bb, profi > e->probability = old_edge->probability; > > FOR_EACH_EDGE (e, ei, copy_stmt_bb->succs) > - if ((e->flags & EDGE_EH) && !e->probability.initialized_p ()) > - e->probability = profile_probability::never (); > + if (e->flags & EDGE_EH) > + { > + if (!e->probability.initialized_p ()) > + e->probability = profile_probability::never (); > + if (e->dest->index < id->add_clobbers_to_eh_landing_pads) > + { Why don't we need to watch out for EDGE_COUNT (e->dest->preds) != 1? Is it that we just don't care? > + add_clobbers_to_eh_landing_pad (e->dest, id); > + id->add_clobbers_to_eh_landing_pads = 0; > + } > + } > } > > > @@ -4565,6 +4607,8 @@ expand_call_inline (basic_block bb, gimp > id->decl_map = new hash_map<tree, tree>; > dst = id->debug_map; > id->debug_map = NULL; > + if (flag_stack_reuse != SR_NONE) > + id->add_clobbers_to_eh_landing_pads = last_basic_block_for_fn (cfun); > > /* Record the function we are about to inline. */ > id->src_fn = fn; > @@ -4872,6 +4916,7 @@ expand_call_inline (basic_block bb, gimp > } > > id->assign_stmts.release (); > + id->add_clobbers_to_eh_landing_pads = 0; > > /* Output the inlining info for this abstract function, since it has been > inlined. If we don't do this now, we can lose the information about the > --- gcc/testsuite/g++.dg/opt/pr86214-1.C.jj 2019-01-17 14:22:38.083789139 > +0100 > +++ gcc/testsuite/g++.dg/opt/pr86214-1.C 2019-01-17 14:22:38.083789139 > +0100 > @@ -0,0 +1,30 @@ > +// PR tree-optimization/86214 > +// { dg-do compile } > +// { dg-options "-O2 -Wstack-usage=15000" } > + > +typedef __SIZE_TYPE__ size_t; > +struct A { A (); ~A (); int a; void qux (const char *); }; > +int bar (char *); > + > +static inline A > +foo () > +{ > + char b[8192]; > + int x = bar (b); > + A s; > + if (x > 0 && (size_t) x < sizeof b) > + s.qux (b); > + return s; > +} > + > +void > +baz () // { dg-bogus "stack usage is" } > +{ > + A a; > + char c[1024]; > + bar (c); > + foo (); foo (); foo (); foo (); foo (); > + foo (); foo (); foo (); foo (); foo (); > + foo (); foo (); foo (); foo (); foo (); > + foo (); foo (); foo (); foo (); foo (); > +} > --- gcc/testsuite/g++.dg/opt/pr86214-2.C.jj 2019-01-17 14:22:38.083789139 > +0100 > +++ gcc/testsuite/g++.dg/opt/pr86214-2.C 2019-01-17 14:22:38.083789139 > +0100 > @@ -0,0 +1,28 @@ > +// PR tree-optimization/86214 > +// { dg-do compile } > +// { dg-options "-O2 -Wstack-usage=15000" } > + > +typedef __SIZE_TYPE__ size_t; > +struct A { A (); ~A (); int a; void qux (const char *); }; > +int bar (char *); > + > +static inline __attribute__((always_inline)) A > +foo () > +{ > + char b[8192]; > + int x = bar (b); > + A s; > + if (x > 0 && (size_t) x < sizeof b) > + s.qux (b); > + return s; > +} > + > +void > +baz () // { dg-bogus "stack usage is" } > +{ > + A a; > + foo (); foo (); foo (); foo (); foo (); > + foo (); foo (); foo (); foo (); foo (); > + foo (); foo (); foo (); foo (); foo (); > + foo (); foo (); foo (); foo (); foo (); > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)