This patch fixes PR63596. There is no need to push/pop all arguments registers. We only need to push and pop those registers used. These use info is calculated by a dedicated vaarg optimization tree pass "tree-stdarg", the backend should honor it's analysis result.
For a simple testcase where vaarg declared but actually not used: int f (int a, ...) { return a; } before this patch, we are generating: f: sub sp, sp, #192 stp x1, x2, [sp, 136] stp x3, x4, [sp, 152] stp x5, x6, [sp, 168] str x7, [sp, 184] str q0, [sp] str q1, [sp, 16] str q2, [sp, 32] str q3, [sp, 48] str q4, [sp, 64] str q5, [sp, 80] str q6, [sp, 96] str q7, [sp, 112] add sp, sp, 192 ret after this patch, it's optimized into: f: ret OK for trunk? 2016-05-06 Jiong Wang <jiong.w...@arm.com> gcc/ PR63596 * config/aarch64/aarch64.c (aarch64_expand_builtin_va_start): Honor tree-stdarg analysis results. (aarch64_setup_incoming_varargs): Likewise. gcc/testsuite/ PR63596 * gcc.target/aarch64/va_arg_1.c: New testcase. * gcc.target/aarch64/va_arg_2.c: Likewise. * gcc.target/aarch64/va_arg_3.c: Likewise.
>From dfcfe78511047501ed4b2f323b190c1290314104 Mon Sep 17 00:00:00 2001 From: "Jiong.Wang" <jiong.w...@arm.com> Date: Fri, 6 May 2016 14:36:42 +0100 Subject: [PATCH 2/4] 2 --- gcc/config/aarch64/aarch64.c | 35 ++++++++++++++++++----------- gcc/testsuite/gcc.target/aarch64/va_arg_1.c | 11 +++++++++ gcc/testsuite/gcc.target/aarch64/va_arg_2.c | 18 +++++++++++++++ gcc/testsuite/gcc.target/aarch64/va_arg_3.c | 26 +++++++++++++++++++++ 4 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_3.c diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index aff4a95..b1a0287 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9502,15 +9502,17 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; tree stack, grtop, vrtop, groff, vroff; tree t; - int gr_save_area_size; - int vr_save_area_size; + int gr_save_area_size = cfun->va_list_gpr_size; + int vr_save_area_size = cfun->va_list_fpr_size; int vr_offset; cum = &crtl->args.info; - gr_save_area_size - = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD; - vr_save_area_size - = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG; + if (cfun->va_list_gpr_size) + gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD, + cfun->va_list_gpr_size); + if (cfun->va_list_fpr_size) + vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn) + * UNITS_PER_VREG, cfun->va_list_fpr_size); if (!TARGET_FLOAT) { @@ -9844,7 +9846,8 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); CUMULATIVE_ARGS local_cum; - int gr_saved, vr_saved; + int gr_saved = cfun->va_list_gpr_size; + int vr_saved = cfun->va_list_fpr_size; /* The caller has advanced CUM up to, but not beyond, the last named argument. Advance a local copy of CUM past the last "real" named @@ -9852,9 +9855,14 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, local_cum = *cum; aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true); - /* Found out how many registers we need to save. */ - gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn; - vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn; + /* Found out how many registers we need to save. + Honor tree-stdvar analysis results. */ + if (cfun->va_list_gpr_size) + gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn, + cfun->va_list_gpr_size / UNITS_PER_WORD); + if (cfun->va_list_fpr_size) + vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn, + cfun->va_list_fpr_size / UNITS_PER_VREG); if (!TARGET_FLOAT) { @@ -9882,7 +9890,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, /* We can't use move_block_from_reg, because it will use the wrong mode, storing D regs only. */ machine_mode mode = TImode; - int off, i; + int off, i, vr_start; /* Set OFF to the offset from virtual_incoming_args_rtx of the first vector register. The VR save area lies below @@ -9891,14 +9899,15 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, STACK_BOUNDARY / BITS_PER_UNIT); off -= vr_saved * UNITS_PER_VREG; - for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i) + vr_start = V0_REGNUM + local_cum.aapcs_nvrn; + for (i = 0; i < vr_saved; ++i) { rtx ptr, mem; ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off); mem = gen_frame_mem (mode, ptr); set_mem_alias_set (mem, get_varargs_alias_set ()); - aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i)); + aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i)); off += UNITS_PER_VREG; } } diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_1.c b/gcc/testsuite/gcc.target/aarch64/va_arg_1.c new file mode 100644 index 0000000..e8e3cda --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/va_arg_1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --save-temps" } */ + +int +f (int a, ...) +{ + /* { dg-final { scan-assembler-not "str" } } */ + return a; +} + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_2.c b/gcc/testsuite/gcc.target/aarch64/va_arg_2.c new file mode 100644 index 0000000..f5c46cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/va_arg_2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --save-temps" } */ + +int +foo (char *fmt, ...) +{ + int d; + __builtin_va_list ap; + + __builtin_va_start (ap, fmt); + d = __builtin_va_arg (ap, int); + __builtin_va_end (ap); + + /* { dg-final { scan-assembler-not "x7" } } */ + return d; +} + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_3.c b/gcc/testsuite/gcc.target/aarch64/va_arg_3.c new file mode 100644 index 0000000..7f7601a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/va_arg_3.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --save-temps" } */ + +int d2i (double a); + +int +foo (char *fmt, ...) +{ + int d, e; + double f, g; + __builtin_va_list ap; + + __builtin_va_start (ap, fmt); + d = __builtin_va_arg (ap, int); + f = __builtin_va_arg (ap, double); + g = __builtin_va_arg (ap, double); + d += d2i (f); + d += d2i (g); + __builtin_va_end (ap); + + /* { dg-final { scan-assembler-not "x7" } } */ + /* { dg-final { scan-assembler-not "q7" } } */ + return d; +} + +/* { dg-final { cleanup-saved-temps } } */ -- 1.9.1