Hi Richard,

> -----Original Message-----
> From: Gcc-patches <gcc-patches-boun...@gcc.gnu.org> On Behalf Of
> Richard Biener
> Sent: 01 October 2020 14:15
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH] tree-optimization/97236 - fix bad use of
> VMAT_CONTIGUOUS
> 
> This avoids using VMAT_CONTIGUOUS with single-element interleaving
> when using V1mode vectors.  Instead keep VMAT_ELEMENTWISE but
> continue to avoid load-lanes and gathers.
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I've checked that this fix also fixes the recently-reported PR 98949 on aarch64 
on the GCC 9 branch.
I've bootstrapped and tested it on the branch on aarch64-none-linux.
Is it okay to backport to the branch?

Thanks,
Kyrill

> 
> Richard.
> 
> 2020-10-01  Richard Biener  <rguent...@suse.de>
> 
>       PR tree-optimization/97236
>       * tree-vect-stmts.c (get_group_load_store_type): Keep
>       VMAT_ELEMENTWISE for single-element vectors.
> 
>       * gcc.dg/vect/pr97236.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/vect/pr97236.c | 43
> +++++++++++++++++++++++++++++
>  gcc/tree-vect-stmts.c               | 20 ++++++--------
>  2 files changed, 52 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr97236.c
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/pr97236.c
> b/gcc/testsuite/gcc.dg/vect/pr97236.c
> new file mode 100644
> index 00000000000..03e0cc38984
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr97236.c
> @@ -0,0 +1,43 @@
> +typedef unsigned char __uint8_t;
> +typedef __uint8_t uint8_t;
> +typedef struct plane_t {
> +  uint8_t *p_pixels;
> +  int i_lines;
> +  int i_pitch;
> +} plane_t;
> +
> +typedef struct {
> +  plane_t p[5];
> +} picture_t;
> +
> +#define N 4
> +
> +void __attribute__((noipa))
> +picture_Clone(picture_t *picture, picture_t *res)
> +{
> +  for (int i = 0; i < N; i++) {
> +    res->p[i].p_pixels = picture->p[i].p_pixels;
> +    res->p[i].i_lines = picture->p[i].i_lines;
> +    res->p[i].i_pitch = picture->p[i].i_pitch;
> +  }
> +}
> +
> +int
> +main()
> +{
> +  picture_t aaa, bbb;
> +  uint8_t pixels[10] = {1, 1, 1, 1, 1, 1, 1, 1};
> +
> +  for (unsigned i = 0; i < N; i++)
> +    aaa.p[i].p_pixels = pixels;
> +
> +  picture_Clone (&aaa, &bbb);
> +
> +  uint8_t c;
> +  for (unsigned i = 0; i < N; i++)
> +    c += bbb.p[i].p_pixels[0];
> +
> +  if (c != N)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 191957c3543..3575f25241f 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -2235,25 +2235,23 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
>         /* First cope with the degenerate case of a single-element
>            vector.  */
>         if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
> -         *memory_access_type = VMAT_CONTIGUOUS;
> +         ;
> 
>         /* Otherwise try using LOAD/STORE_LANES.  */
> -       if (*memory_access_type == VMAT_ELEMENTWISE
> -           && (vls_type == VLS_LOAD
> -               ? vect_load_lanes_supported (vectype, group_size,
> masked_p)
> -               : vect_store_lanes_supported (vectype, group_size,
> -                                             masked_p)))
> +       else if (vls_type == VLS_LOAD
> +                ? vect_load_lanes_supported (vectype, group_size,
> masked_p)
> +                : vect_store_lanes_supported (vectype, group_size,
> +                                              masked_p))
>           {
>             *memory_access_type = VMAT_LOAD_STORE_LANES;
>             overrun_p = would_overrun_p;
>           }
> 
>         /* If that fails, try using permuting loads.  */
> -       if (*memory_access_type == VMAT_ELEMENTWISE
> -           && (vls_type == VLS_LOAD
> -               ? vect_grouped_load_supported (vectype,
> single_element_p,
> -                                              group_size)
> -               : vect_grouped_store_supported (vectype, group_size)))
> +       else if (vls_type == VLS_LOAD
> +                ? vect_grouped_load_supported (vectype,
> single_element_p,
> +                                               group_size)
> +                : vect_grouped_store_supported (vectype, group_size))
>           {
>             *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
>             overrun_p = would_overrun_p;
> --
> 2.26.2

Reply via email to