Hi!

This is a partial step towards fixing that PR.
For musttail recursive calls which have non-is_gimple_reg_type typed
parameters, the only case we've handled was if the exact parameter
was passed through (perhaps modified, but still the same PARM_DECL).
That isn't necessary, we can copy the argument to the parameter as well
(just need to watch for the use of the parameter in later arguments,
say musttail recursive call which swaps 2 structure arguments).

The patch attempts to play safe and punts if any of the parameters are
addressable (like we do for all normal tail calls and tail recursions,
except for musttail in the posted unreviewed patch).

With this patch (at least when early inlining isn't done on not yet
optimized body) inlining should see already tail recursion optimized
body and will not have problems with SRA breaking musttail.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-04-01  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/119493
        * tree-tailcall.cc (find_tail_calls): Don't punt on tail recusion
        if some arguments don't have is_gimple_reg_type, only punt if they
        have non-POD types, or volatile, or addressable.  Set
        tailr_arg_needs_copy in those cases too.
        (eliminate_tail_call): Copy call arguments to params if they don't
        have is_gimple_reg_type, use temporaries if the argument is used
        later.
        (tree_optimize_tail_calls_1): Skip !is_gimple_reg_type
        tailr_arg_needs_copy parameters.  Formatting fix.

        * gcc.dg/pr119493-1.c: New test.

--- gcc/tree-tailcall.cc.jj     2025-03-28 10:49:30.000000000 +0100
+++ gcc/tree-tailcall.cc        2025-03-31 12:31:01.717603446 +0200
@@ -672,19 +672,16 @@ find_tail_calls (basic_block bb, struct
                 have a copyable type and the two arguments must have reasonably
                 equivalent types.  The latter requirement could be relaxed if
                 we emitted a suitable type conversion statement.  */
-             if (!is_gimple_reg_type (TREE_TYPE (param))
+             if (TREE_ADDRESSABLE (TREE_TYPE (param))
                  || !useless_type_conversion_p (TREE_TYPE (param),
                                                 TREE_TYPE (arg)))
                break;
 
-             /* The parameter should be a real operand, so that phi node
-                created for it at the start of the function has the meaning
-                of copying the value.  This test implies is_gimple_reg_type
-                from the previous condition, however this one could be
-                relaxed by being more careful with copying the new value
-                of the parameter (emitting appropriate GIMPLE_ASSIGN and
-                updating the virtual operands).  */
-             if (!is_gimple_reg (param))
+             if (is_gimple_reg_type (TREE_TYPE (param))
+                 ? !is_gimple_reg (param)
+                 : (!is_gimple_variable (param)
+                    || TREE_THIS_VOLATILE (param)
+                    || may_be_aliased (param)))
                break;
            }
        }
@@ -934,9 +931,9 @@ find_tail_calls (basic_block bb, struct
           param = DECL_CHAIN (param), idx++)
        {
          tree ddef, arg = gimple_call_arg (call, idx);
-         if (is_gimple_reg (param)
-             && (ddef = ssa_default_def (cfun, param))
-             && (arg != ddef))
+         if (!is_gimple_reg (param)
+             || ((ddef = ssa_default_def (cfun, param))
+                 && arg != ddef))
            bitmap_set_bit (tailr_arg_needs_copy, idx);
        }
     }
@@ -1212,6 +1209,7 @@ eliminate_tail_call (struct tailcall *t,
 
   /* Add phi node entries for arguments.  The ordering of the phi nodes should
      be the same as the ordering of the arguments.  */
+  auto_vec<tree> copies;
   for (param = DECL_ARGUMENTS (current_function_decl),
         idx = 0, gpi = gsi_start_phis (first);
        param;
@@ -1220,6 +1218,35 @@ eliminate_tail_call (struct tailcall *t,
       if (!bitmap_bit_p (tailr_arg_needs_copy, idx))
        continue;
 
+      if (!is_gimple_reg_type (TREE_TYPE (param)))
+       {
+         if (param == gimple_call_arg (stmt, idx))
+           continue;
+         /* First check if param isn't used by any of the following
+            call arguments.  If it is, we need to copy first to
+            a temporary and only after doing all the assignments copy it
+            to param.  */
+         size_t idx2 = idx + 1;
+         tree param2 = DECL_CHAIN (param);
+         for (; param2; param2 = DECL_CHAIN (param2), idx2++)
+           if (!is_gimple_reg_type (TREE_TYPE (param)))
+             {
+               tree base = get_base_address (gimple_call_arg (stmt, idx2));
+               if (base == param)
+                 break;
+             }
+         tree tmp = param;
+         if (param2)
+           {
+             tmp = create_tmp_var (TREE_TYPE (param));
+             copies.safe_push (param);
+             copies.safe_push (tmp);
+           }
+         gimple *g = gimple_build_assign (tmp, gimple_call_arg (stmt, idx));
+         gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT);
+         continue;
+       }
+
       arg = gimple_call_arg (stmt, idx);
       phi = gpi.phi ();
       gcc_assert (param == SSA_NAME_VAR (PHI_RESULT (phi)));
@@ -1227,6 +1254,11 @@ eliminate_tail_call (struct tailcall *t,
       add_phi_arg (phi, arg, e, gimple_location (stmt));
       gsi_next (&gpi);
     }
+  for (unsigned i = 0; i < copies.length (); i += 2)
+    {
+      gimple *g = gimple_build_assign (copies[i], copies[i + 1]);
+      gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT);
+    }
 
   /* Update the values of accumulators.  */
   adjust_accumulator_values (t->call_gsi, t->mult, t->add, e);
@@ -1354,8 +1386,8 @@ tree_optimize_tail_calls_1 (bool opt_tai
             or if there are existing degenerate PHI nodes.  */
          if (!single_pred_p (first)
              || !gimple_seq_empty_p (phi_nodes (first)))
-           first =
-             split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+           first
+             = split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
 
          /* Copy the args if needed.  */
          unsigned idx;
@@ -1364,6 +1396,8 @@ tree_optimize_tail_calls_1 (bool opt_tai
               param = DECL_CHAIN (param), idx++)
            if (bitmap_bit_p (tailr_arg_needs_copy, idx))
              {
+               if (!is_gimple_reg_type (TREE_TYPE (param)))
+                 continue;
                tree name = ssa_default_def (cfun, param);
                tree new_name = make_ssa_name (param, SSA_NAME_DEF_STMT (name));
                gphi *phi;
--- gcc/testsuite/gcc.dg/pr119493-1.c.jj        2025-03-31 12:37:08.194529478 
+0200
+++ gcc/testsuite/gcc.dg/pr119493-1.c   2025-03-31 12:45:10.583848794 +0200
@@ -0,0 +1,55 @@
+/* PR tree-optimization/119493 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-tailr1" } */
+/* { dg-final { scan-tree-dump-times " = foo \\\(\[^\n\r;]*\\\);" 4 "tailr1" } 
} */
+/* { dg-final { scan-tree-dump-times " = bar \\\(\[^\n\r;]*\\\);" 4 "tailr1" } 
} */
+/* { dg-final { scan-tree-dump-not " = foo \\\(\[^\n\r;]*\\\); \\\[must tail 
call\\\]" "tailr1" } } */
+/* { dg-final { scan-tree-dump-not " = bar \\\(\[^\n\r;]*\\\); \\\[must tail 
call\\\]" "tailr1" } } */
+
+struct S { unsigned s; };
+struct T { struct S t[2]; };
+
+[[gnu::noinline, gnu::noclone]] struct S
+foo (struct S m)
+{
+  if (m.s == 0 || m.s == 42)
+    return m;
+  [[gnu::musttail]] return foo ((struct S) { m.s - 1 });
+}
+
+[[gnu::noinline, gnu::noclone]] struct S
+bar (struct T m, struct S n, int o, int p, int q)
+{
+  struct T r;
+  if (m.t[1].s != o || n.s != o)
+    __builtin_abort ();
+  if (o == 0 || o == 42)
+    return n;
+  r = m;
+  m.t[1].s -= p;
+  r.t[1].s -= q;
+  [[gnu::musttail]] return bar (r, m.t[1], o - 1, p, q);
+}
+
+int
+main ()
+{
+  if (foo ((struct S) { 0 }).s != 0)
+    __builtin_abort ();
+  if (foo ((struct S) { 4 }).s != 0)
+    __builtin_abort ();
+  if (foo ((struct S) { 42 }).s != 42)
+    __builtin_abort ();
+  if (foo ((struct S) { 51 }).s != 42)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 0 }, { 0 } } }, (struct S) { 0 }, 0, 1, 1).s != 0)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 7 }, { 7 } } }, (struct S) { 7 }, 7, 1, 1).s != 0)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 42 }, { 42 } } },
+          (struct S) { 42 }, 42, 1, 1).s != 42)
+    __builtin_abort ();
+  if (bar ((struct T) { { { 48 }, { 48 } } },
+          (struct S) { 48 }, 48, 1, 1).s != 42)
+    __builtin_abort ();
+}

        Jakub

Reply via email to