Hi! As the following testcases show, we are unable to share stack slots for (large) variables from inline functions if something in those inline functions can throw externally.
The issue is that the clobbers we have even in the EH paths are usually removed by ehcleanup1 which attempts to make the functions smaller before inlining; if the only reason to throw internally and then rethrow externally are clobber stmts, that is too high cost and so we optimize them away. If such functions are inlined into a function that has some EH landing pads active for the inlined functions though, those EH landing pads are where the local variables are all considered to conflict between different inlines, because there is no clobber on that path. The following patch fixes it by adding clobbers at the start of the EH landing pad in the inlining function's caller if it was external throw before and now is internal throw (i.e. when we add an edge from newly added bb in the inlining to bb in the function from before inlining). If we throw externally from a function (even if it is inline function), all local variables in that function are out of scope. I've bootstrapped/regtested an earlier version of this patch which just had another basic_block member id->eh_landing_pad_bb used to assert that there is at most one such landing pad (which survived x86_64-linux and i686-linux bootstrap/regtest), ok for trunk if this patch passes bootstrap/regtest too? 2019-01-17 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/86214 * tree-inline.h (struct copy_body_data): Add add_clobbers_to_eh_landing_pads member. * tree-inline.c (add_clobbers_to_eh_landing_pad): New function. (copy_edges_for_bb): Call it if EH edge destination is < id->add_clobbers_to_eh_landing_pads. Fix a comment typo. (expand_call_inline): Set id->add_clobbers_to_eh_landing_pads if flag_stack_reuse != SR_NONE and clear it afterwards. * g++.dg/opt/pr86214-1.C: New test. * g++.dg/opt/pr86214-2.C: New test. --- gcc/tree-inline.h.jj 2019-01-17 13:19:56.678539594 +0100 +++ gcc/tree-inline.h 2019-01-17 14:22:56.138496295 +0100 @@ -155,6 +155,12 @@ struct copy_body_data /* A list of addressable local variables remapped into the caller when inlining a call within an OpenMP SIMD-on-SIMT loop. */ vec<tree> *dst_simt_vars; + + /* If clobbers for local variables from the inline function + that need to live in memory should be added to EH landing pads + outside of the inlined function, this should be the number + of basic blocks in the caller before inlining. Zero otherwise. */ + int add_clobbers_to_eh_landing_pads; }; /* Weights of constructions for estimate_num_insns. */ --- gcc/tree-inline.c.jj 2019-01-17 13:19:56.668539756 +0100 +++ gcc/tree-inline.c 2019-01-17 14:23:42.364746520 +0100 @@ -2190,6 +2190,40 @@ update_ssa_across_abnormal_edges (basic_ } } +/* Insert clobbers for automatic variables of inlined ID->src_fn + function at the start of basic block BB. */ + +static void +add_clobbers_to_eh_landing_pad (basic_block bb, copy_body_data *id) +{ + tree var; + unsigned int i; + FOR_EACH_VEC_SAFE_ELT (id->src_cfun->local_decls, i, var) + if (VAR_P (var) + && !DECL_HARD_REGISTER (var) + && !TREE_THIS_VOLATILE (var) + && !DECL_HAS_VALUE_EXPR_P (var) + && !is_gimple_reg (var) + && auto_var_in_fn_p (var, id->src_fn)) + { + tree *t = id->decl_map->get (var); + if (!t) + continue; + tree new_var = *t; + if (VAR_P (new_var) + && !DECL_HARD_REGISTER (new_var) + && !TREE_THIS_VOLATILE (new_var) + && !DECL_HAS_VALUE_EXPR_P (new_var) + && !is_gimple_reg (new_var) + && auto_var_in_fn_p (new_var, id->dst_fn)) + { + gimple_stmt_iterator gsi = gsi_after_labels (bb); + tree clobber = build_clobber (TREE_TYPE (new_var)); + gimple *clobber_stmt = gimple_build_assign (new_var, clobber); + gsi_insert_before (&gsi, clobber_stmt, GSI_NEW_STMT); + } + } +} /* Copy edges from BB into its copy constructed earlier, scale profile accordingly. Edges will be taken care of later. Assume aux @@ -2232,7 +2266,7 @@ copy_edges_for_bb (basic_block bb, profi if (bb->index == ENTRY_BLOCK || bb->index == EXIT_BLOCK) return false; - /* When doing function splitting, we must decreate count of the return block + /* When doing function splitting, we must decrease count of the return block which was previously reachable by block we did not copy. */ if (single_succ_p (bb) && single_succ_edge (bb)->dest->index == EXIT_BLOCK) FOR_EACH_EDGE (old_edge, ei, bb->preds) @@ -2317,8 +2351,16 @@ copy_edges_for_bb (basic_block bb, profi e->probability = old_edge->probability; FOR_EACH_EDGE (e, ei, copy_stmt_bb->succs) - if ((e->flags & EDGE_EH) && !e->probability.initialized_p ()) - e->probability = profile_probability::never (); + if (e->flags & EDGE_EH) + { + if (!e->probability.initialized_p ()) + e->probability = profile_probability::never (); + if (e->dest->index < id->add_clobbers_to_eh_landing_pads) + { + add_clobbers_to_eh_landing_pad (e->dest, id); + id->add_clobbers_to_eh_landing_pads = 0; + } + } } @@ -4565,6 +4607,8 @@ expand_call_inline (basic_block bb, gimp id->decl_map = new hash_map<tree, tree>; dst = id->debug_map; id->debug_map = NULL; + if (flag_stack_reuse != SR_NONE) + id->add_clobbers_to_eh_landing_pads = last_basic_block_for_fn (cfun); /* Record the function we are about to inline. */ id->src_fn = fn; @@ -4872,6 +4916,7 @@ expand_call_inline (basic_block bb, gimp } id->assign_stmts.release (); + id->add_clobbers_to_eh_landing_pads = 0; /* Output the inlining info for this abstract function, since it has been inlined. If we don't do this now, we can lose the information about the --- gcc/testsuite/g++.dg/opt/pr86214-1.C.jj 2019-01-17 14:22:38.083789139 +0100 +++ gcc/testsuite/g++.dg/opt/pr86214-1.C 2019-01-17 14:22:38.083789139 +0100 @@ -0,0 +1,30 @@ +// PR tree-optimization/86214 +// { dg-do compile } +// { dg-options "-O2 -Wstack-usage=15000" } + +typedef __SIZE_TYPE__ size_t; +struct A { A (); ~A (); int a; void qux (const char *); }; +int bar (char *); + +static inline A +foo () +{ + char b[8192]; + int x = bar (b); + A s; + if (x > 0 && (size_t) x < sizeof b) + s.qux (b); + return s; +} + +void +baz () // { dg-bogus "stack usage is" } +{ + A a; + char c[1024]; + bar (c); + foo (); foo (); foo (); foo (); foo (); + foo (); foo (); foo (); foo (); foo (); + foo (); foo (); foo (); foo (); foo (); + foo (); foo (); foo (); foo (); foo (); +} --- gcc/testsuite/g++.dg/opt/pr86214-2.C.jj 2019-01-17 14:22:38.083789139 +0100 +++ gcc/testsuite/g++.dg/opt/pr86214-2.C 2019-01-17 14:22:38.083789139 +0100 @@ -0,0 +1,28 @@ +// PR tree-optimization/86214 +// { dg-do compile } +// { dg-options "-O2 -Wstack-usage=15000" } + +typedef __SIZE_TYPE__ size_t; +struct A { A (); ~A (); int a; void qux (const char *); }; +int bar (char *); + +static inline __attribute__((always_inline)) A +foo () +{ + char b[8192]; + int x = bar (b); + A s; + if (x > 0 && (size_t) x < sizeof b) + s.qux (b); + return s; +} + +void +baz () // { dg-bogus "stack usage is" } +{ + A a; + foo (); foo (); foo (); foo (); foo (); + foo (); foo (); foo (); foo (); foo (); + foo (); foo (); foo (); foo (); foo (); + foo (); foo (); foo (); foo (); foo (); +} Jakub