This patch fixes PR63596.

There is no need to push/pop all arguments registers. We only need to
push and pop those registers used. These use info is calculated by a
dedicated vaarg optimization tree pass "tree-stdarg", the backend should
honor it's analysis result.

For a simple testcase where vaarg declared but actually not used:

int
f (int a, ...)
{
  return a;
}

before this patch, we are generating:

f:
        sub     sp, sp, #192
        stp     x1, x2, [sp, 136]
        stp     x3, x4, [sp, 152]
        stp     x5, x6, [sp, 168]
        str     x7, [sp, 184]
        str     q0, [sp]
        str     q1, [sp, 16]
        str     q2, [sp, 32]
        str     q3, [sp, 48]
        str     q4, [sp, 64]
        str     q5, [sp, 80]
        str     q6, [sp, 96]
        str     q7, [sp, 112]
        add     sp, sp, 192
        ret

after this patch, it's optimized into:

f:
        ret

OK for trunk?

2016-05-06  Jiong Wang  <jiong.w...@arm.com>
gcc/
  PR63596
  * config/aarch64/aarch64.c (aarch64_expand_builtin_va_start): Honor
  tree-stdarg analysis results.
  (aarch64_setup_incoming_varargs): Likewise.

gcc/testsuite/
  PR63596
  * gcc.target/aarch64/va_arg_1.c: New testcase.
  * gcc.target/aarch64/va_arg_2.c: Likewise.
  * gcc.target/aarch64/va_arg_3.c: Likewise.

>From dfcfe78511047501ed4b2f323b190c1290314104 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.w...@arm.com>
Date: Fri, 6 May 2016 14:36:42 +0100
Subject: [PATCH 2/4] 2

---
 gcc/config/aarch64/aarch64.c                | 35 ++++++++++++++++++-----------
 gcc/testsuite/gcc.target/aarch64/va_arg_1.c | 11 +++++++++
 gcc/testsuite/gcc.target/aarch64/va_arg_2.c | 18 +++++++++++++++
 gcc/testsuite/gcc.target/aarch64/va_arg_3.c | 26 +++++++++++++++++++++
 4 files changed, 77 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_3.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aff4a95..b1a0287 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9502,15 +9502,17 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
   tree stack, grtop, vrtop, groff, vroff;
   tree t;
-  int gr_save_area_size;
-  int vr_save_area_size;
+  int gr_save_area_size = cfun->va_list_gpr_size;
+  int vr_save_area_size = cfun->va_list_fpr_size;
   int vr_offset;
 
   cum = &crtl->args.info;
-  gr_save_area_size
-    = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
-  vr_save_area_size
-    = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
+  if (cfun->va_list_gpr_size)
+    gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
+			     cfun->va_list_gpr_size);
+  if (cfun->va_list_fpr_size)
+    vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
+			     * UNITS_PER_VREG, cfun->va_list_fpr_size);
 
   if (!TARGET_FLOAT)
     {
@@ -9844,7 +9846,8 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   CUMULATIVE_ARGS local_cum;
-  int gr_saved, vr_saved;
+  int gr_saved = cfun->va_list_gpr_size;
+  int vr_saved = cfun->va_list_fpr_size;
 
   /* The caller has advanced CUM up to, but not beyond, the last named
      argument.  Advance a local copy of CUM past the last "real" named
@@ -9852,9 +9855,14 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
   local_cum = *cum;
   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
 
-  /* Found out how many registers we need to save.  */
-  gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
-  vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
+  /* Found out how many registers we need to save.
+     Honor tree-stdvar analysis results.  */
+  if (cfun->va_list_gpr_size)
+    gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
+		    cfun->va_list_gpr_size / UNITS_PER_WORD);
+  if (cfun->va_list_fpr_size)
+    vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
+		    cfun->va_list_fpr_size / UNITS_PER_VREG);
 
   if (!TARGET_FLOAT)
     {
@@ -9882,7 +9890,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
 	  /* We can't use move_block_from_reg, because it will use
 	     the wrong mode, storing D regs only.  */
 	  machine_mode mode = TImode;
-	  int off, i;
+	  int off, i, vr_start;
 
 	  /* Set OFF to the offset from virtual_incoming_args_rtx of
 	     the first vector register.  The VR save area lies below
@@ -9891,14 +9899,15 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
 			   STACK_BOUNDARY / BITS_PER_UNIT);
 	  off -= vr_saved * UNITS_PER_VREG;
 
-	  for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
+	  vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
+	  for (i = 0; i < vr_saved; ++i)
 	    {
 	      rtx ptr, mem;
 
 	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
 	      mem = gen_frame_mem (mode, ptr);
 	      set_mem_alias_set (mem, get_varargs_alias_set ());
-	      aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
+	      aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
 	      off += UNITS_PER_VREG;
 	    }
 	}
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_1.c b/gcc/testsuite/gcc.target/aarch64/va_arg_1.c
new file mode 100644
index 0000000..e8e3cda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+int
+f (int a, ...)
+{
+  /* { dg-final { scan-assembler-not "str" } } */
+  return a;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_2.c b/gcc/testsuite/gcc.target/aarch64/va_arg_2.c
new file mode 100644
index 0000000..f5c46cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+int
+foo (char *fmt, ...)
+{
+  int d;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  d = __builtin_va_arg (ap, int);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-assembler-not "x7" } } */
+  return d;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_3.c b/gcc/testsuite/gcc.target/aarch64/va_arg_3.c
new file mode 100644
index 0000000..7f7601a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_3.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+int d2i (double a);
+
+int
+foo (char *fmt, ...)
+{
+  int d, e;
+  double f, g;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  d = __builtin_va_arg (ap, int);
+  f = __builtin_va_arg (ap, double);
+  g = __builtin_va_arg (ap, double);
+  d += d2i (f);
+  d += d2i (g);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-assembler-not "x7" } } */
+  /* { dg-final { scan-assembler-not "q7" } } */
+  return d;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
-- 
1.9.1

Reply via email to