https://gcc.gnu.org/g:01acd453d89ff5e414fade2dfeeae1f652143376
commit r15-9132-g01acd453d89ff5e414fade2dfeeae1f652143376 Author: Jakub Jelinek <ja...@redhat.com> Date: Tue Apr 1 16:47:37 2025 +0200 tailc: Improve tail recursion handling [PR119493] This is a partial step towards fixing that PR. For musttail recursive calls which have non-is_gimple_reg_type typed parameters, the only case we've handled was if the exact parameter was passed through (perhaps modified, but still the same PARM_DECL). That isn't necessary, we can copy the argument to the parameter as well (just need to watch for the use of the parameter in later arguments, say musttail recursive call which swaps 2 structure arguments). The patch attempts to play safe and punts if any of the parameters are addressable (like we do for all normal tail calls and tail recursions, except for musttail in the posted unreviewed patch). With this patch (at least when early inlining isn't done on not yet optimized body) inlining should see already tail recursion optimized body and will not have problems with SRA breaking musttail. This version of the patch limits this for musttail tail recursions, with intent to enable for all tail recursions in GCC 16. 2025-04-01 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/119493 * tree-tailcall.cc (find_tail_calls): Don't punt on tail recusion if some arguments don't have is_gimple_reg_type, only punt if they have non-POD types, or volatile, or addressable or (for now) it is not a musttail call. Set tailr_arg_needs_copy in those cases too. (eliminate_tail_call): Copy call arguments to params if they don't have is_gimple_reg_type, use temporaries if the argument is used later. (tree_optimize_tail_calls_1): Skip !is_gimple_reg_type tailr_arg_needs_copy parameters. Formatting fix. * gcc.dg/pr119493-1.c: New test. Diff: --- gcc/testsuite/gcc.dg/pr119493-1.c | 55 ++++++++++++++++++++++++++++++++++ gcc/tree-tailcall.cc | 63 ++++++++++++++++++++++++++++++--------- 2 files changed, 104 insertions(+), 14 deletions(-) diff --git a/gcc/testsuite/gcc.dg/pr119493-1.c b/gcc/testsuite/gcc.dg/pr119493-1.c new file mode 100644 index 000000000000..edba61c503c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr119493-1.c @@ -0,0 +1,55 @@ +/* PR tree-optimization/119493 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-tailr1" } */ +/* { dg-final { scan-tree-dump-times " = foo \\\(\[^\n\r;]*\\\);" 4 "tailr1" } } */ +/* { dg-final { scan-tree-dump-times " = bar \\\(\[^\n\r;]*\\\);" 4 "tailr1" } } */ +/* { dg-final { scan-tree-dump-not " = foo \\\(\[^\n\r;]*\\\); \\\[must tail call\\\]" "tailr1" } } */ +/* { dg-final { scan-tree-dump-not " = bar \\\(\[^\n\r;]*\\\); \\\[must tail call\\\]" "tailr1" } } */ + +struct S { unsigned s; }; +struct T { struct S t[2]; }; + +[[gnu::noinline, gnu::noclone]] struct S +foo (struct S m) +{ + if (m.s == 0 || m.s == 42) + return m; + [[gnu::musttail]] return foo ((struct S) { m.s - 1 }); +} + +[[gnu::noinline, gnu::noclone]] struct S +bar (struct T m, struct S n, int o, int p, int q) +{ + struct T r; + if (m.t[1].s != o || n.s != o) + __builtin_abort (); + if (o == 0 || o == 42) + return n; + r = m; + m.t[1].s -= p; + r.t[1].s -= q; + [[gnu::musttail]] return bar (r, m.t[1], o - 1, p, q); +} + +int +main () +{ + if (foo ((struct S) { 0 }).s != 0) + __builtin_abort (); + if (foo ((struct S) { 4 }).s != 0) + __builtin_abort (); + if (foo ((struct S) { 42 }).s != 42) + __builtin_abort (); + if (foo ((struct S) { 51 }).s != 42) + __builtin_abort (); + if (bar ((struct T) { { { 0 }, { 0 } } }, (struct S) { 0 }, 0, 1, 1).s != 0) + __builtin_abort (); + if (bar ((struct T) { { { 7 }, { 7 } } }, (struct S) { 7 }, 7, 1, 1).s != 0) + __builtin_abort (); + if (bar ((struct T) { { { 42 }, { 42 } } }, + (struct S) { 42 }, 42, 1, 1).s != 42) + __builtin_abort (); + if (bar ((struct T) { { { 48 }, { 48 } } }, + (struct S) { 48 }, 48, 1, 1).s != 42) + __builtin_abort (); +} diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc index bc053aefea95..e71341bfb2bc 100644 --- a/gcc/tree-tailcall.cc +++ b/gcc/tree-tailcall.cc @@ -676,19 +676,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, have a copyable type and the two arguments must have reasonably equivalent types. The latter requirement could be relaxed if we emitted a suitable type conversion statement. */ - if (!is_gimple_reg_type (TREE_TYPE (param)) + if (TREE_ADDRESSABLE (TREE_TYPE (param)) || !useless_type_conversion_p (TREE_TYPE (param), TREE_TYPE (arg))) break; - /* The parameter should be a real operand, so that phi node - created for it at the start of the function has the meaning - of copying the value. This test implies is_gimple_reg_type - from the previous condition, however this one could be - relaxed by being more careful with copying the new value - of the parameter (emitting appropriate GIMPLE_ASSIGN and - updating the virtual operands). */ - if (!is_gimple_reg (param)) + if (is_gimple_reg_type (TREE_TYPE (param)) + ? !is_gimple_reg (param) + : (!is_gimple_variable (param) + || TREE_THIS_VOLATILE (param) + || may_be_aliased (param) + || !gimple_call_must_tail_p (call))) break; } } @@ -938,9 +936,9 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, param = DECL_CHAIN (param), idx++) { tree ddef, arg = gimple_call_arg (call, idx); - if (is_gimple_reg (param) - && (ddef = ssa_default_def (cfun, param)) - && (arg != ddef)) + if (!is_gimple_reg (param) + || ((ddef = ssa_default_def (cfun, param)) + && arg != ddef)) bitmap_set_bit (tailr_arg_needs_copy, idx); } } @@ -1216,6 +1214,7 @@ eliminate_tail_call (struct tailcall *t, class loop *&new_loop) /* Add phi node entries for arguments. The ordering of the phi nodes should be the same as the ordering of the arguments. */ + auto_vec<tree> copies; for (param = DECL_ARGUMENTS (current_function_decl), idx = 0, gpi = gsi_start_phis (first); param; @@ -1224,6 +1223,35 @@ eliminate_tail_call (struct tailcall *t, class loop *&new_loop) if (!bitmap_bit_p (tailr_arg_needs_copy, idx)) continue; + if (!is_gimple_reg_type (TREE_TYPE (param))) + { + if (param == gimple_call_arg (stmt, idx)) + continue; + /* First check if param isn't used by any of the following + call arguments. If it is, we need to copy first to + a temporary and only after doing all the assignments copy it + to param. */ + size_t idx2 = idx + 1; + tree param2 = DECL_CHAIN (param); + for (; param2; param2 = DECL_CHAIN (param2), idx2++) + if (!is_gimple_reg_type (TREE_TYPE (param))) + { + tree base = get_base_address (gimple_call_arg (stmt, idx2)); + if (base == param) + break; + } + tree tmp = param; + if (param2) + { + tmp = create_tmp_var (TREE_TYPE (param)); + copies.safe_push (param); + copies.safe_push (tmp); + } + gimple *g = gimple_build_assign (tmp, gimple_call_arg (stmt, idx)); + gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT); + continue; + } + arg = gimple_call_arg (stmt, idx); phi = gpi.phi (); gcc_assert (param == SSA_NAME_VAR (PHI_RESULT (phi))); @@ -1231,6 +1259,11 @@ eliminate_tail_call (struct tailcall *t, class loop *&new_loop) add_phi_arg (phi, arg, e, gimple_location (stmt)); gsi_next (&gpi); } + for (unsigned i = 0; i < copies.length (); i += 2) + { + gimple *g = gimple_build_assign (copies[i], copies[i + 1]); + gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT); + } /* Update the values of accumulators. */ adjust_accumulator_values (t->call_gsi, t->mult, t->add, e); @@ -1390,8 +1423,8 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_musttail, or if there are existing degenerate PHI nodes. */ if (!single_pred_p (first) || !gimple_seq_empty_p (phi_nodes (first))) - first = - split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + first + = split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); /* Copy the args if needed. */ unsigned idx; @@ -1400,6 +1433,8 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_musttail, param = DECL_CHAIN (param), idx++) if (bitmap_bit_p (tailr_arg_needs_copy, idx)) { + if (!is_gimple_reg_type (TREE_TYPE (param))) + continue; tree name = ssa_default_def (cfun, param); tree new_name = make_ssa_name (param, SSA_NAME_DEF_STMT (name)); gphi *phi;