https://gcc.gnu.org/g:01acd453d89ff5e414fade2dfeeae1f652143376

commit r15-9132-g01acd453d89ff5e414fade2dfeeae1f652143376
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Tue Apr 1 16:47:37 2025 +0200

    tailc: Improve tail recursion handling [PR119493]
    
    This is a partial step towards fixing that PR.
    For musttail recursive calls which have non-is_gimple_reg_type typed
    parameters, the only case we've handled was if the exact parameter
    was passed through (perhaps modified, but still the same PARM_DECL).
    That isn't necessary, we can copy the argument to the parameter as well
    (just need to watch for the use of the parameter in later arguments,
    say musttail recursive call which swaps 2 structure arguments).
    
    The patch attempts to play safe and punts if any of the parameters are
    addressable (like we do for all normal tail calls and tail recursions,
    except for musttail in the posted unreviewed patch).
    
    With this patch (at least when early inlining isn't done on not yet
    optimized body) inlining should see already tail recursion optimized
    body and will not have problems with SRA breaking musttail.
    
    This version of the patch limits this for musttail tail recursions,
    with intent to enable for all tail recursions in GCC 16.
    
    2025-04-01  Jakub Jelinek  <ja...@redhat.com>
    
            PR tree-optimization/119493
            * tree-tailcall.cc (find_tail_calls): Don't punt on tail recusion
            if some arguments don't have is_gimple_reg_type, only punt if they
            have non-POD types, or volatile, or addressable or (for now) it is
            not a musttail call.  Set tailr_arg_needs_copy in those cases too.
            (eliminate_tail_call): Copy call arguments to params if they don't
            have is_gimple_reg_type, use temporaries if the argument is used
            later.
            (tree_optimize_tail_calls_1): Skip !is_gimple_reg_type
            tailr_arg_needs_copy parameters.  Formatting fix.
    
            * gcc.dg/pr119493-1.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/pr119493-1.c | 55 ++++++++++++++++++++++++++++++++++
 gcc/tree-tailcall.cc              | 63 ++++++++++++++++++++++++++++++---------
 2 files changed, 104 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr119493-1.c 
b/gcc/testsuite/gcc.dg/pr119493-1.c
new file mode 100644
index 000000000000..edba61c503c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr119493-1.c
@@ -0,0 +1,55 @@
+/* PR tree-optimization/119493 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-tailr1" } */
+/* { dg-final { scan-tree-dump-times " = foo \\\(\[^\n\r;]*\\\);" 4 "tailr1" } 
} */
+/* { dg-final { scan-tree-dump-times " = bar \\\(\[^\n\r;]*\\\);" 4 "tailr1" } 
} */
+/* { dg-final { scan-tree-dump-not " = foo \\\(\[^\n\r;]*\\\); \\\[must tail 
call\\\]" "tailr1" } } */
+/* { dg-final { scan-tree-dump-not " = bar \\\(\[^\n\r;]*\\\); \\\[must tail 
call\\\]" "tailr1" } } */
+
+struct S { unsigned s; };
+struct T { struct S t[2]; };
+
+[[gnu::noinline, gnu::noclone]] struct S
+foo (struct S m)
+{
+  if (m.s == 0 || m.s == 42)
+    return m;
+  [[gnu::musttail]] return foo ((struct S) { m.s - 1 });
+}
+
+[[gnu::noinline, gnu::noclone]] struct S
+bar (struct T m, struct S n, int o, int p, int q)
+{
+  struct T r;
+  if (m.t[1].s != o || n.s != o)
+    __builtin_abort ();
+  if (o == 0 || o == 42)
+    return n;
+  r = m;
+  m.t[1].s -= p;
+  r.t[1].s -= q;
+  [[gnu::musttail]] return bar (r, m.t[1], o - 1, p, q);
+}
+
+int
+main ()
+{
+  if (foo ((struct S) { 0 }).s != 0)
+    __builtin_abort ();
+  if (foo ((struct S) { 4 }).s != 0)
+    __builtin_abort ();
+  if (foo ((struct S) { 42 }).s != 42)
+    __builtin_abort ();
+  if (foo ((struct S) { 51 }).s != 42)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 0 }, { 0 } } }, (struct S) { 0 }, 0, 1, 1).s != 0)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 7 }, { 7 } } }, (struct S) { 7 }, 7, 1, 1).s != 0)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 42 }, { 42 } } },
+          (struct S) { 42 }, 42, 1, 1).s != 42)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 48 }, { 48 } } },
+          (struct S) { 48 }, 48, 1, 1).s != 42)
+    __builtin_abort ();
+}
diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index bc053aefea95..e71341bfb2bc 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -676,19 +676,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail,
                 have a copyable type and the two arguments must have reasonably
                 equivalent types.  The latter requirement could be relaxed if
                 we emitted a suitable type conversion statement.  */
-             if (!is_gimple_reg_type (TREE_TYPE (param))
+             if (TREE_ADDRESSABLE (TREE_TYPE (param))
                  || !useless_type_conversion_p (TREE_TYPE (param),
                                                 TREE_TYPE (arg)))
                break;
 
-             /* The parameter should be a real operand, so that phi node
-                created for it at the start of the function has the meaning
-                of copying the value.  This test implies is_gimple_reg_type
-                from the previous condition, however this one could be
-                relaxed by being more careful with copying the new value
-                of the parameter (emitting appropriate GIMPLE_ASSIGN and
-                updating the virtual operands).  */
-             if (!is_gimple_reg (param))
+             if (is_gimple_reg_type (TREE_TYPE (param))
+                 ? !is_gimple_reg (param)
+                 : (!is_gimple_variable (param)
+                    || TREE_THIS_VOLATILE (param)
+                    || may_be_aliased (param)
+                    || !gimple_call_must_tail_p (call)))
                break;
            }
        }
@@ -938,9 +936,9 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail,
           param = DECL_CHAIN (param), idx++)
        {
          tree ddef, arg = gimple_call_arg (call, idx);
-         if (is_gimple_reg (param)
-             && (ddef = ssa_default_def (cfun, param))
-             && (arg != ddef))
+         if (!is_gimple_reg (param)
+             || ((ddef = ssa_default_def (cfun, param))
+                 && arg != ddef))
            bitmap_set_bit (tailr_arg_needs_copy, idx);
        }
     }
@@ -1216,6 +1214,7 @@ eliminate_tail_call (struct tailcall *t, class loop 
*&new_loop)
 
   /* Add phi node entries for arguments.  The ordering of the phi nodes should
      be the same as the ordering of the arguments.  */
+  auto_vec<tree> copies;
   for (param = DECL_ARGUMENTS (current_function_decl),
         idx = 0, gpi = gsi_start_phis (first);
        param;
@@ -1224,6 +1223,35 @@ eliminate_tail_call (struct tailcall *t, class loop 
*&new_loop)
       if (!bitmap_bit_p (tailr_arg_needs_copy, idx))
        continue;
 
+      if (!is_gimple_reg_type (TREE_TYPE (param)))
+       {
+         if (param == gimple_call_arg (stmt, idx))
+           continue;
+         /* First check if param isn't used by any of the following
+            call arguments.  If it is, we need to copy first to
+            a temporary and only after doing all the assignments copy it
+            to param.  */
+         size_t idx2 = idx + 1;
+         tree param2 = DECL_CHAIN (param);
+         for (; param2; param2 = DECL_CHAIN (param2), idx2++)
+           if (!is_gimple_reg_type (TREE_TYPE (param)))
+             {
+               tree base = get_base_address (gimple_call_arg (stmt, idx2));
+               if (base == param)
+                 break;
+             }
+         tree tmp = param;
+         if (param2)
+           {
+             tmp = create_tmp_var (TREE_TYPE (param));
+             copies.safe_push (param);
+             copies.safe_push (tmp);
+           }
+         gimple *g = gimple_build_assign (tmp, gimple_call_arg (stmt, idx));
+         gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT);
+         continue;
+       }
+
       arg = gimple_call_arg (stmt, idx);
       phi = gpi.phi ();
       gcc_assert (param == SSA_NAME_VAR (PHI_RESULT (phi)));
@@ -1231,6 +1259,11 @@ eliminate_tail_call (struct tailcall *t, class loop 
*&new_loop)
       add_phi_arg (phi, arg, e, gimple_location (stmt));
       gsi_next (&gpi);
     }
+  for (unsigned i = 0; i < copies.length (); i += 2)
+    {
+      gimple *g = gimple_build_assign (copies[i], copies[i + 1]);
+      gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT);
+    }
 
   /* Update the values of accumulators.  */
   adjust_accumulator_values (t->call_gsi, t->mult, t->add, e);
@@ -1390,8 +1423,8 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool 
only_musttail,
             or if there are existing degenerate PHI nodes.  */
          if (!single_pred_p (first)
              || !gimple_seq_empty_p (phi_nodes (first)))
-           first =
-             split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+           first
+             = split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
 
          /* Copy the args if needed.  */
          unsigned idx;
@@ -1400,6 +1433,8 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool 
only_musttail,
               param = DECL_CHAIN (param), idx++)
            if (bitmap_bit_p (tailr_arg_needs_copy, idx))
              {
+               if (!is_gimple_reg_type (TREE_TYPE (param)))
+                 continue;
                tree name = ssa_default_def (cfun, param);
                tree new_name = make_ssa_name (param, SSA_NAME_DEF_STMT (name));
                gphi *phi;

Reply via email to