On Tue, Apr 01, 2025 at 10:59:36AM +0200, Jakub Jelinek wrote: > On Tue, Apr 01, 2025 at 10:46:15AM +0200, Richard Biener wrote: > > This looks OK, but I wonder if ... > > > - /* The parameter should be a real operand, so that phi node > > > - created for it at the start of the function has the meaning > > > - of copying the value. This test implies is_gimple_reg_type > > > - from the previous condition, however this one could be > > > - relaxed by being more careful with copying the new value > > > - of the parameter (emitting appropriate GIMPLE_ASSIGN and > > > - updating the virtual operands). */ > > > - if (!is_gimple_reg (param)) > > > + if (is_gimple_reg_type (TREE_TYPE (param)) > > > + ? !is_gimple_reg (param) > > > > ... we want to restrict this to musttail calls at this point and > > relax for stage1 only? > > I can do that, sure.
Here it is, ok if it passes bootstrap/regtest? I'll queue the interdiff between this patch and the previous one for GCC 16. 2025-04-01 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/119493 * tree-tailcall.cc (find_tail_calls): Don't punt on tail recusion if some arguments don't have is_gimple_reg_type, only punt if they have non-POD types, or volatile, or addressable or (for now) it is not a musttail call. Set tailr_arg_needs_copy in those cases too. (eliminate_tail_call): Copy call arguments to params if they don't have is_gimple_reg_type, use temporaries if the argument is used later. (tree_optimize_tail_calls_1): Skip !is_gimple_reg_type tailr_arg_needs_copy parameters. Formatting fix. * gcc.dg/pr119493-1.c: New test. --- gcc/tree-tailcall.cc.jj 2025-03-28 10:49:30.000000000 +0100 +++ gcc/tree-tailcall.cc 2025-04-01 11:47:31.417637335 +0200 @@ -676,19 +676,17 @@ find_tail_calls (basic_block bb, struct have a copyable type and the two arguments must have reasonably equivalent types. The latter requirement could be relaxed if we emitted a suitable type conversion statement. */ - if (!is_gimple_reg_type (TREE_TYPE (param)) + if (TREE_ADDRESSABLE (TREE_TYPE (param)) || !useless_type_conversion_p (TREE_TYPE (param), TREE_TYPE (arg))) break; - /* The parameter should be a real operand, so that phi node - created for it at the start of the function has the meaning - of copying the value. This test implies is_gimple_reg_type - from the previous condition, however this one could be - relaxed by being more careful with copying the new value - of the parameter (emitting appropriate GIMPLE_ASSIGN and - updating the virtual operands). */ - if (!is_gimple_reg (param)) + if (is_gimple_reg_type (TREE_TYPE (param)) + ? !is_gimple_reg (param) + : (!is_gimple_variable (param) + || TREE_THIS_VOLATILE (param) + || may_be_aliased (param) + || !gimple_call_must_tail_p (call))) break; } } @@ -938,9 +936,9 @@ find_tail_calls (basic_block bb, struct param = DECL_CHAIN (param), idx++) { tree ddef, arg = gimple_call_arg (call, idx); - if (is_gimple_reg (param) - && (ddef = ssa_default_def (cfun, param)) - && (arg != ddef)) + if (!is_gimple_reg (param) + || ((ddef = ssa_default_def (cfun, param)) + && arg != ddef)) bitmap_set_bit (tailr_arg_needs_copy, idx); } } @@ -1216,6 +1214,7 @@ eliminate_tail_call (struct tailcall *t, /* Add phi node entries for arguments. The ordering of the phi nodes should be the same as the ordering of the arguments. */ + auto_vec<tree> copies; for (param = DECL_ARGUMENTS (current_function_decl), idx = 0, gpi = gsi_start_phis (first); param; @@ -1224,6 +1223,35 @@ eliminate_tail_call (struct tailcall *t, if (!bitmap_bit_p (tailr_arg_needs_copy, idx)) continue; + if (!is_gimple_reg_type (TREE_TYPE (param))) + { + if (param == gimple_call_arg (stmt, idx)) + continue; + /* First check if param isn't used by any of the following + call arguments. If it is, we need to copy first to + a temporary and only after doing all the assignments copy it + to param. */ + size_t idx2 = idx + 1; + tree param2 = DECL_CHAIN (param); + for (; param2; param2 = DECL_CHAIN (param2), idx2++) + if (!is_gimple_reg_type (TREE_TYPE (param))) + { + tree base = get_base_address (gimple_call_arg (stmt, idx2)); + if (base == param) + break; + } + tree tmp = param; + if (param2) + { + tmp = create_tmp_var (TREE_TYPE (param)); + copies.safe_push (param); + copies.safe_push (tmp); + } + gimple *g = gimple_build_assign (tmp, gimple_call_arg (stmt, idx)); + gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT); + continue; + } + arg = gimple_call_arg (stmt, idx); phi = gpi.phi (); gcc_assert (param == SSA_NAME_VAR (PHI_RESULT (phi))); @@ -1231,6 +1259,11 @@ eliminate_tail_call (struct tailcall *t, add_phi_arg (phi, arg, e, gimple_location (stmt)); gsi_next (&gpi); } + for (unsigned i = 0; i < copies.length (); i += 2) + { + gimple *g = gimple_build_assign (copies[i], copies[i + 1]); + gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT); + } /* Update the values of accumulators. */ adjust_accumulator_values (t->call_gsi, t->mult, t->add, e); @@ -1390,8 +1423,8 @@ tree_optimize_tail_calls_1 (bool opt_tai or if there are existing degenerate PHI nodes. */ if (!single_pred_p (first) || !gimple_seq_empty_p (phi_nodes (first))) - first = - split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + first + = split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); /* Copy the args if needed. */ unsigned idx; @@ -1400,6 +1433,8 @@ tree_optimize_tail_calls_1 (bool opt_tai param = DECL_CHAIN (param), idx++) if (bitmap_bit_p (tailr_arg_needs_copy, idx)) { + if (!is_gimple_reg_type (TREE_TYPE (param))) + continue; tree name = ssa_default_def (cfun, param); tree new_name = make_ssa_name (param, SSA_NAME_DEF_STMT (name)); gphi *phi; --- gcc/testsuite/gcc.dg/pr119493-1.c.jj 2025-03-31 12:37:08.194529478 +0200 +++ gcc/testsuite/gcc.dg/pr119493-1.c 2025-03-31 12:45:10.583848794 +0200 @@ -0,0 +1,55 @@ +/* PR tree-optimization/119493 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-tailr1" } */ +/* { dg-final { scan-tree-dump-times " = foo \\\(\[^\n\r;]*\\\);" 4 "tailr1" } } */ +/* { dg-final { scan-tree-dump-times " = bar \\\(\[^\n\r;]*\\\);" 4 "tailr1" } } */ +/* { dg-final { scan-tree-dump-not " = foo \\\(\[^\n\r;]*\\\); \\\[must tail call\\\]" "tailr1" } } */ +/* { dg-final { scan-tree-dump-not " = bar \\\(\[^\n\r;]*\\\); \\\[must tail call\\\]" "tailr1" } } */ + +struct S { unsigned s; }; +struct T { struct S t[2]; }; + +[[gnu::noinline, gnu::noclone]] struct S +foo (struct S m) +{ + if (m.s == 0 || m.s == 42) + return m; + [[gnu::musttail]] return foo ((struct S) { m.s - 1 }); +} + +[[gnu::noinline, gnu::noclone]] struct S +bar (struct T m, struct S n, int o, int p, int q) +{ + struct T r; + if (m.t[1].s != o || n.s != o) + __builtin_abort (); + if (o == 0 || o == 42) + return n; + r = m; + m.t[1].s -= p; + r.t[1].s -= q; + [[gnu::musttail]] return bar (r, m.t[1], o - 1, p, q); +} + +int +main () +{ + if (foo ((struct S) { 0 }).s != 0) + __builtin_abort (); + if (foo ((struct S) { 4 }).s != 0) + __builtin_abort (); + if (foo ((struct S) { 42 }).s != 42) + __builtin_abort (); + if (foo ((struct S) { 51 }).s != 42) + __builtin_abort (); + if (bar ((struct T) { { { 0 }, { 0 } } }, (struct S) { 0 }, 0, 1, 1).s != 0) + __builtin_abort (); + if (bar ((struct T) { { { 7 }, { 7 } } }, (struct S) { 7 }, 7, 1, 1).s != 0) + __builtin_abort (); + if (bar ((struct T) { { { 42 }, { 42 } } }, + (struct S) { 42 }, 42, 1, 1).s != 42) + __builtin_abort (); + if (bar ((struct T) { { { 48 }, { 48 } } }, + (struct S) { 48 }, 48, 1, 1).s != 42) + __builtin_abort (); +} Jakub