In the case of tailing call with a return of a structure, currently
all large structures are rejected. We can allow the case were the return
of the "tail call" function is setting the return value of the current
function.  This allows for the musttail that is located in pr71761-1.c.

This should be safe as sibcalls here is RSO is always set for this case so
the current function no longer owns the space.

There is a small fix needed for targets where targetm.calls.structure_value_rtx
returns non-null. The setting of this register was not being done for sibcalls
(pass == 0) but in this case we need to do that. Also adds a testcase which
shows the issue.

Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu with no 
regressions.

        PR middle-end/71761

gcc/ChangeLog:

        * calls.cc (can_implement_as_sibling_call_p): Don't reject
        "structured" returns if the addr is the same as the current
        result decl's memory.
        (expand_call): For pass 0, also copy into struct_value_rtx
        the correct value.

gcc/testsuite/ChangeLog:

        * c-c++-common/pr71761-1.c: New test.
        * gcc.dg/torture/tail-call-0.c: New test.

Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
---
 gcc/calls.cc                               | 25 +++++++++----
 gcc/testsuite/c-c++-common/pr71761-1.c     | 17 +++++++++
 gcc/testsuite/gcc.dg/torture/tail-call-0.c | 42 ++++++++++++++++++++++
 3 files changed, 77 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/pr71761-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/tail-call-0.c

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 1dcda20a06e..6233272d53d 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -2575,14 +2575,25 @@ can_implement_as_sibling_call_p (tree exp,
       return false;
     }
 
-  /* Doing sibling call optimization needs some work, since
-     structure_value_addr can be allocated on the stack.
-     It does not seem worth the effort since few optimizable
-     sibling calls will return a structure.  */
+  /* For sibling call optimization with a structure return,
+     the return value needs to be the same as the current function,
+     otherwise the structure return will be allocated in the stack.  */
   if (structure_value_addr != NULL_RTX)
     {
-      maybe_complain_about_tail_call (exp, _("callee returns a structure"));
-      return false;
+      tree res = DECL_RESULT (current_function_decl);
+      bool ok = false;
+      if (res && DECL_RTL_SET_P (res))
+       {
+         rtx res_rtx = DECL_RTL (res);
+         if (MEM_P (res_rtx)
+             && XEXP (res_rtx, 0) == structure_value_addr)
+           ok = true;
+       }
+      if (!ok)
+       {
+         maybe_complain_about_tail_call (exp, _("callee returns a structure"));
+         return false;
+       }
     }
 
   /* Check whether the target is able to optimize the call
@@ -3700,7 +3711,7 @@ expand_call (tree exp, rtx target, int ignore)
 
       /* Pass the function the address in which to return a
         structure value.  */
-      if (pass != 0 && structure_value_addr && ! structure_value_addr_parm)
+      if (structure_value_addr && ! structure_value_addr_parm)
        {
          structure_value_addr
            = convert_memory_address (Pmode, structure_value_addr);
diff --git a/gcc/testsuite/c-c++-common/pr71761-1.c 
b/gcc/testsuite/c-c++-common/pr71761-1.c
new file mode 100644
index 00000000000..41f4f834fc7
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr71761-1.c
@@ -0,0 +1,17 @@
+/* PR middle-end/71761 */
+/* { dg-do compile { target musttail } } */
+/* { dg-options "-O2" } */
+
+typedef struct Foo {
+   int o[16];
+}Foo;
+
+Foo moo();
+
+Foo goo()
+{
+  [[gnu::musttail]]
+  return moo();
+}
+
+/* { dg-final { scan-assembler "jmp    moo" { target { { i?86-*-* x86_64-*-* } 
&& { ! ia32 } } } } } */
diff --git a/gcc/testsuite/gcc.dg/torture/tail-call-0.c 
b/gcc/testsuite/gcc.dg/torture/tail-call-0.c
new file mode 100644
index 00000000000..cd213a0e54b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/tail-call-0.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* This was being miscompiled due
+   to the sibcall in g to f as gh
+   would clobber the struct value register.
+   This is a reduced testcase from c-parser.cc code.  */
+
+typedef struct token
+{
+    char const* tok_start;
+    char const* tok_end;
+    int tok_type;
+    unsigned identifier_hash;
+}token;
+
+__attribute__((noinline,noipa))
+token f()
+{
+  return (token){};
+}
+__attribute__((noinline,noipa))
+void gh()
+{
+  // Clobber the struct value register
+#if defined(__aarch64__)
+  asm("mov x8, 0":::"x8");
+#endif
+}
+
+__attribute__((noinline,noipa))
+token g(int t)
+{
+  if (t)
+    gh();
+  return f();
+}
+
+int main()
+{
+  token t = g(1);
+  return 0;
+}
+
-- 
2.34.1

Reply via email to