On Tue, 7 Jul 2020, Richard Sandiford wrote:

> Richard Biener <rguent...@suse.de> writes:
> > This fixes a condition that caused all negative step DR groups to
> > be detected as single element interleaving.  Such groups are
> > rejected by interleaving vectorization but miscompiled by SLP
> > which is fixed by forcing VMAT_STRIDED_SLP for now.
> >
> > Bootstrap & regtest running on x86_64-unknown-linux-gnu.
> 
> Where does the link between VMAT_CONTIGUOUS_REVERSE and single-element
> interleaving happen?  (Sorry, should probably spend more time figuring
> that out for myself.)

You go via get_group_load_store_type to get_negative_load_store_type
but only if (slp) - as said the non-SLP path doesn't seem to support
grouped accesses (interleaving) with negative step.

> In principle VMAT_CONTIGUOUS_REVERSE seems like the right classification
> for the testcases.

Yes, it does.  It's just the existing implementation is wrong
for SLP at least - it works for single-elements only since
the permute mask for reverse is a fixed {n-1, n-2,...0} rather
than reversing the groups and also the offsetting done
by vector-size - 1 is not taking into account group size either.

I ran into this (and the other alignment issue) because I thought
representing non-grouped loads as single element interleaving
groups would make those easy to support for SLP but a naiive
implementation runs into all kind of interesting things ;)

Richard.

> Thanks,
> Richard
> 
> 
> 
> >
> >     * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
> >     group overlap condition to allow negative step DR groups.
> >     * tree-vect-stmts.c (get_group_load_store_type): For
> >     multi element SLP groups force VMAT_STRIDED_SLP when the step
> >     is negative.
> >
> >     * gcc.dg/vect/slp-47.c: New testcase.
> >     * gcc.dg/vect/slp-48.c: Likewise.
> > ---
> >  gcc/testsuite/gcc.dg/vect/slp-47.c | 56 ++++++++++++++++++++++++++++++
> >  gcc/testsuite/gcc.dg/vect/slp-48.c | 56 ++++++++++++++++++++++++++++++
> >  gcc/tree-vect-data-refs.c          |  8 +++--
> >  gcc/tree-vect-stmts.c              | 11 ++++--
> >  4 files changed, 126 insertions(+), 5 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/vect/slp-47.c
> >  create mode 100644 gcc/testsuite/gcc.dg/vect/slp-48.c
> >
> > diff --git a/gcc/testsuite/gcc.dg/vect/slp-47.c 
> > b/gcc/testsuite/gcc.dg/vect/slp-47.c
> > new file mode 100644
> > index 00000000000..7b2ddf664df
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/slp-47.c
> > @@ -0,0 +1,56 @@
> > +/* { dg-require-effective-target vect_int } */
> > +
> > +#include "tree-vect.h"
> > +
> > +int x[1024], y[1024];
> > +
> > +void __attribute__((noipa)) foo()
> > +{
> > +  for (int i = 0; i < 512; ++i)
> > +    {
> > +      x[2*i] = y[1023 - (2*i)];
> > +      x[2*i+1] = y[1023 - (2*i+1)];
> > +    }
> > +}
> > +
> > +void __attribute__((noipa)) bar()
> > +{
> > +  for (int i = 0; i < 512; ++i)
> > +    {
> > +      x[2*i] = y[1023 - (2*i+1)];
> > +      x[2*i+1] = y[1023 - (2*i)];
> > +    }
> > +}
> > +
> > +int 
> > +main ()
> > +{
> > +  check_vect ();
> > +
> > +  for (int i = 0; i < 1024; ++i)
> > +    {
> > +      x[i] = 0;
> > +      y[i] = i;
> > +      __asm__ volatile ("");
> > +    }
> > +
> > +  foo ();
> > +  for (int i = 0; i < 1024; ++i)
> > +    if (x[i] != y[1023 - i])
> > +      abort ();
> > +
> > +  for (int i = 0; i < 1024; ++i)
> > +    {
> > +      x[i] = 0;
> > +      __asm__ volatile ("");
> > +    }
> > +
> > +  bar ();
> > +  for (int i = 0; i < 1024; ++i)
> > +    if (x[i] != y[1023 - i^1])
> > +      abort ();
> > +
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 
> > "vect" } } */
> > diff --git a/gcc/testsuite/gcc.dg/vect/slp-48.c 
> > b/gcc/testsuite/gcc.dg/vect/slp-48.c
> > new file mode 100644
> > index 00000000000..0b327aede8e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/slp-48.c
> > @@ -0,0 +1,56 @@
> > +/* { dg-require-effective-target vect_int } */
> > +
> > +#include "tree-vect.h"
> > +
> > +int x[1024], y[1024];
> > +
> > +void __attribute__((noipa)) foo()
> > +{
> > +  for (int i = 0; i < 512; ++i)
> > +    {
> > +      x[1023 - (2*i+1)] = y[2*i];
> > +      x[1023 - (2*i)] = y[2*i+1];
> > +    }
> > +}
> > +
> > +void __attribute__((noipa)) bar()
> > +{
> > +  for (int i = 0; i < 512; ++i)
> > +    {
> > +      x[1023 - (2*i+1)] = y[2*i+1];
> > +      x[1023 - (2*i)] = y[2*i];
> > +    }
> > +}
> > +
> > +int 
> > +main ()
> > +{
> > +  check_vect ();
> > +
> > +  for (int i = 0; i < 1024; ++i)
> > +    {
> > +      x[i] = 0;
> > +      y[i] = i;
> > +      __asm__ volatile ("");
> > +    }
> > +
> > +  foo ();
> > +  for (int i = 0; i < 1024; ++i)
> > +    if (x[i] != y[1023 - i^1])
> > +      abort ();
> > +
> > +  for (int i = 0; i < 1024; ++i)
> > +    {
> > +      x[i] = 0;
> > +      __asm__ volatile ("");
> > +    }
> > +
> > +  bar ();
> > +  for (int i = 0; i < 1024; ++i)
> > +    if (x[i] != y[1023 - i])
> > +      abort ();
> > +
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 
> > "vect" } } */
> > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> > index 959c2d3378f..2b4421b5fb4 100644
> > --- a/gcc/tree-vect-data-refs.c
> > +++ b/gcc/tree-vect-data-refs.c
> > @@ -3074,13 +3074,15 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
> >           if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
> >             break;
> >  
> > -         /* If the step (if not zero or non-constant) is greater than the
> > +         /* If the step (if not zero or non-constant) is smaller than the
> >              difference between data-refs' inits this splits groups into
> >              suitable sizes.  */
> >           if (tree_fits_shwi_p (DR_STEP (dra)))
> >             {
> > -             HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
> > -             if (step != 0 && step <= (init_b - init_a))
> > +             unsigned HOST_WIDE_INT step
> > +               = absu_hwi (tree_to_shwi (DR_STEP (dra)));
> > +             if (step != 0
> > +                 && step <= (unsigned HOST_WIDE_INT)(init_b - init_a))
> >                 break;
> >             }
> >         }
> > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> > index f66c5f5b367..fcae3ef5f35 100644
> > --- a/gcc/tree-vect-stmts.c
> > +++ b/gcc/tree-vect-stmts.c
> > @@ -2150,8 +2150,15 @@ get_group_load_store_type (vec_info *vinfo, 
> > stmt_vec_info stmt_info,
> >         }
> >       int cmp = compare_step_with_zero (vinfo, stmt_info);
> >       if (cmp < 0)
> > -       *memory_access_type = get_negative_load_store_type
> > -         (vinfo, stmt_info, vectype, vls_type, 1);
> > +       {
> > +         if (single_element_p)
> > +           /* ???  The VMAT_CONTIGUOUS_REVERSE code generation is
> > +              only correct for single element "interleaving" SLP.  */
> > +           *memory_access_type = get_negative_load_store_type
> > +                                  (vinfo, stmt_info, vectype, vls_type, 1);
> > +         else
> > +           *memory_access_type = VMAT_STRIDED_SLP;
> > +       }
> >       else
> >         {
> >           gcc_assert (!loop_vinfo || cmp > 0);
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to