"H.J. Lu" <hjl.to...@gmail.com> writes:
> locate_and_pad_parm is called when expanding function call from
> initialize_argument_information and when generating function body
> from assign_parm_find_entry_rtl:
>
>   /* Remember if the outgoing parameter requires extra alignment on the
>      calling function side.  */
>   if (crtl->stack_alignment_needed < boundary)
>     crtl->stack_alignment_needed = boundary;
>   if (crtl->preferred_stack_boundary < boundary)
>     crtl->preferred_stack_boundary = boundary;
>
> stack_alignment_needed and preferred_stack_boundary should be updated
> only when expanding function call, not when generating function body.
> Add an argument, outgoing_p, to locate_and_pad_parm to indicate for
> expanding function call.  Update stack_alignment_needed and
> preferred_stack_boundary if the parameter is passed on stack and only
> when expanding function call.
>
> Tested on Linux/x86-64.
>
> OK for trunk?
>
> Thanks.
>
> -- 
> H.J.
>
> From e91e20ad8e10373db2c6d8f99a3da0bbf46c5c34 Mon Sep 17 00:00:00 2001
> From: "H.J. Lu" <hjl.to...@gmail.com>
> Date: Wed, 5 Jun 2019 12:55:19 -0700
> Subject: [PATCH] Update preferred_stack_boundary only when expanding function
>  call
>
> locate_and_pad_parm is called when expanding function call from
> initialize_argument_information and when generating function body
> from assign_parm_find_entry_rtl:
>
>   /* Remember if the outgoing parameter requires extra alignment on the
>      calling function side.  */
>   if (crtl->stack_alignment_needed < boundary)
>     crtl->stack_alignment_needed = boundary;
>   if (crtl->preferred_stack_boundary < boundary)
>     crtl->preferred_stack_boundary = boundary;
>
> stack_alignment_needed and preferred_stack_boundary should be updated
> only when expanding function call, not when generating function body.
> Add an argument, outgoing_p, to locate_and_pad_parm to indicate for
> expanding function call.  Update stack_alignment_needed and
> preferred_stack_boundary if the parameter is passed on stack and only
> when expanding function call.
>
> Tested on Linux/x86-64.
>
> gcc/
>
>       PR rtl-optimization/90765
>       * function.c (assign_parm_find_entry_rtl): Pass false to
>       locate_and_pad_parm.
>       (locate_and_pad_parm): Add an argument, outgoing_p, to indicate
>       for expanding function call.  Update stack_alignment_needed and
>       preferred_stack_boundary only if outgoing_p is true and the
>       the parameter is passed on stack.
>       * function.h (locate_and_pad_parm): Add an argument, outgoing_p,
>       defaulting to true.
>
> gcc/testsuite/
>
>       PR rtl-optimization/90765
>       * gcc.target/i386/pr90765-1.c: New test.
>       * gcc.target/i386/pr90765-2.c: Likewise.
> ---
>  gcc/function.c                            | 21 +++++++++++++--------
>  gcc/function.h                            |  3 ++-
>  gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 +++++++++++
>  gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++++
>  4 files changed, 44 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c
>
> diff --git a/gcc/function.c b/gcc/function.c
> index e30ee259bec..9b6673f6f0d 100644
> --- a/gcc/function.c
> +++ b/gcc/function.c
> @@ -2601,7 +2601,7 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all 
> *all,
>    locate_and_pad_parm (data->promoted_mode, data->passed_type, in_regs,
>                      all->reg_parm_stack_space,
>                      entry_parm ? data->partial : 0, current_function_decl,
> -                    &all->stack_args_size, &data->locate);
> +                    &all->stack_args_size, &data->locate, false);
>  
>    /* Update parm_stack_boundary if this parameter is passed in the
>       stack.  */
> @@ -3954,7 +3954,8 @@ locate_and_pad_parm (machine_mode passed_mode, tree 
> type, int in_regs,
>                    int reg_parm_stack_space, int partial,
>                    tree fndecl ATTRIBUTE_UNUSED,
>                    struct args_size *initial_offset_ptr,
> -                  struct locate_and_pad_arg_data *locate)
> +                  struct locate_and_pad_arg_data *locate,
> +                  bool outgoing_p)
>  {
>    tree sizetree;
>    pad_direction where_pad;
> @@ -4021,12 +4022,16 @@ locate_and_pad_parm (machine_mode passed_mode, tree 
> type, int in_regs,
>       }
>      }
>  
> -  /* Remember if the outgoing parameter requires extra alignment on the
> -     calling function side.  */
> -  if (crtl->stack_alignment_needed < boundary)
> -    crtl->stack_alignment_needed = boundary;
> -  if (crtl->preferred_stack_boundary < boundary)
> -    crtl->preferred_stack_boundary = boundary;
> +  if (outgoing_p && !in_regs)
> +    {
> +      /* Remember if the outgoing parameter requires extra alignment on
> +      the calling function side if this parameter is passed in the
> +      stack.  */
> +      if (crtl->stack_alignment_needed < boundary)
> +     crtl->stack_alignment_needed = boundary;
> +      if (crtl->preferred_stack_boundary < boundary)
> +     crtl->preferred_stack_boundary = boundary;
> +    }

Just my opinion, but I think this shows that the code isn't really
in the right place.  IMO locate_and_pad_parm should just describe
the ABI and is too low-level to be modifying global state like this.

I think we could instead do this by walking over the argument vectors
in a subroutine of expand_call and emit_library_call_value_1.
expand_call is also where we do:

  /* Ensure current function's preferred stack boundary is at least
     what we need.  Stack alignment may also increase preferred stack
     boundary.  */
  if (crtl->preferred_stack_boundary < preferred_stack_boundary)
    crtl->preferred_stack_boundary = preferred_stack_boundary;
  else
    preferred_stack_boundary = crtl->preferred_stack_boundary;

Thanks,
Richard

>  
>    if (ARGS_GROW_DOWNWARD)
>      {
> diff --git a/gcc/function.h b/gcc/function.h
> index bfe9919a760..5ad7a33fd39 100644
> --- a/gcc/function.h
> +++ b/gcc/function.h
> @@ -613,7 +613,8 @@ extern bool use_register_for_decl (const_tree);
>  extern gimple_seq gimplify_parameters (gimple_seq *);
>  extern void locate_and_pad_parm (machine_mode, tree, int, int, int,
>                                tree, struct args_size *,
> -                              struct locate_and_pad_arg_data *);
> +                              struct locate_and_pad_arg_data *,
> +                              bool = true);
>  extern void generate_setjmp_warnings (void);
>  
>  /* Identify BLOCKs referenced by more than one NOTE_INSN_BLOCK_{BEG,END},
> diff --git a/gcc/testsuite/gcc.target/i386/pr90765-1.c 
> b/gcc/testsuite/gcc.target/i386/pr90765-1.c
> new file mode 100644
> index 00000000000..178c3ff8054
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr90765-1.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f" } */
> +/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\$-64,\[\\t 
> \]*%\[re\]?sp" } } */
> +
> +typedef int __v16si __attribute__ ((__vector_size__ (64)));
> +
> +void
> +foo (__v16si x, int i0, int i1, int i2, int i3, int i4, int i5, __v16si *p)
> +{
> +  *p = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr90765-2.c 
> b/gcc/testsuite/gcc.target/i386/pr90765-2.c
> new file mode 100644
> index 00000000000..45cf1f03747
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr90765-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f" } */
> +/* { dg-final { scan-assembler "and\[lq\]?\[\\t \]*\\$-64,\[\\t 
> \]*%\[re\]?sp" } } */
> +/* { dg-skip-if "" { x86_64-*-mingw* } } */
> +
> +typedef int __v16si __attribute__ ((__vector_size__ (64)));
> +
> +extern void foo (__v16si, __v16si, __v16si, __v16si, __v16si, __v16si,
> +              __v16si, __v16si, __v16si, int, int, int, int, int,
> +              int, __v16si *);
> +
> +extern __v16si x, y;
> +
> +void
> +bar (void)
> +{
> +  foo (x, x, x, x, x, x, x, x, x, 0, 1, 2, 3, 4, 5, &y);
> +}

Reply via email to