Richard Biener <rguent...@suse.de> writes: > This fixes a condition that caused all negative step DR groups to > be detected as single element interleaving. Such groups are > rejected by interleaving vectorization but miscompiled by SLP > which is fixed by forcing VMAT_STRIDED_SLP for now. > > Bootstrap & regtest running on x86_64-unknown-linux-gnu.
Where does the link between VMAT_CONTIGUOUS_REVERSE and single-element interleaving happen? (Sorry, should probably spend more time figuring that out for myself.) In principle VMAT_CONTIGUOUS_REVERSE seems like the right classification for the testcases. Thanks, Richard > > * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix > group overlap condition to allow negative step DR groups. > * tree-vect-stmts.c (get_group_load_store_type): For > multi element SLP groups force VMAT_STRIDED_SLP when the step > is negative. > > * gcc.dg/vect/slp-47.c: New testcase. > * gcc.dg/vect/slp-48.c: Likewise. > --- > gcc/testsuite/gcc.dg/vect/slp-47.c | 56 ++++++++++++++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/slp-48.c | 56 ++++++++++++++++++++++++++++++ > gcc/tree-vect-data-refs.c | 8 +++-- > gcc/tree-vect-stmts.c | 11 ++++-- > 4 files changed, 126 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/slp-47.c > create mode 100644 gcc/testsuite/gcc.dg/vect/slp-48.c > > diff --git a/gcc/testsuite/gcc.dg/vect/slp-47.c > b/gcc/testsuite/gcc.dg/vect/slp-47.c > new file mode 100644 > index 00000000000..7b2ddf664df > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/slp-47.c > @@ -0,0 +1,56 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include "tree-vect.h" > + > +int x[1024], y[1024]; > + > +void __attribute__((noipa)) foo() > +{ > + for (int i = 0; i < 512; ++i) > + { > + x[2*i] = y[1023 - (2*i)]; > + x[2*i+1] = y[1023 - (2*i+1)]; > + } > +} > + > +void __attribute__((noipa)) bar() > +{ > + for (int i = 0; i < 512; ++i) > + { > + x[2*i] = y[1023 - (2*i+1)]; > + x[2*i+1] = y[1023 - (2*i)]; > + } > +} > + > +int > +main () > +{ > + check_vect (); > + > + for (int i = 0; i < 1024; ++i) > + { > + x[i] = 0; > + y[i] = i; > + __asm__ volatile (""); > + } > + > + foo (); > + for (int i = 0; i < 1024; ++i) > + if (x[i] != y[1023 - i]) > + abort (); > + > + for (int i = 0; i < 1024; ++i) > + { > + x[i] = 0; > + __asm__ volatile (""); > + } > + > + bar (); > + for (int i = 0; i < 1024; ++i) > + if (x[i] != y[1023 - i^1]) > + abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" > } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/slp-48.c > b/gcc/testsuite/gcc.dg/vect/slp-48.c > new file mode 100644 > index 00000000000..0b327aede8e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/slp-48.c > @@ -0,0 +1,56 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include "tree-vect.h" > + > +int x[1024], y[1024]; > + > +void __attribute__((noipa)) foo() > +{ > + for (int i = 0; i < 512; ++i) > + { > + x[1023 - (2*i+1)] = y[2*i]; > + x[1023 - (2*i)] = y[2*i+1]; > + } > +} > + > +void __attribute__((noipa)) bar() > +{ > + for (int i = 0; i < 512; ++i) > + { > + x[1023 - (2*i+1)] = y[2*i+1]; > + x[1023 - (2*i)] = y[2*i]; > + } > +} > + > +int > +main () > +{ > + check_vect (); > + > + for (int i = 0; i < 1024; ++i) > + { > + x[i] = 0; > + y[i] = i; > + __asm__ volatile (""); > + } > + > + foo (); > + for (int i = 0; i < 1024; ++i) > + if (x[i] != y[1023 - i^1]) > + abort (); > + > + for (int i = 0; i < 1024; ++i) > + { > + x[i] = 0; > + __asm__ volatile (""); > + } > + > + bar (); > + for (int i = 0; i < 1024; ++i) > + if (x[i] != y[1023 - i]) > + abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" > } } */ > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c > index 959c2d3378f..2b4421b5fb4 100644 > --- a/gcc/tree-vect-data-refs.c > +++ b/gcc/tree-vect-data-refs.c > @@ -3074,13 +3074,15 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a) > break; > > - /* If the step (if not zero or non-constant) is greater than the > + /* If the step (if not zero or non-constant) is smaller than the > difference between data-refs' inits this splits groups into > suitable sizes. */ > if (tree_fits_shwi_p (DR_STEP (dra))) > { > - HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra)); > - if (step != 0 && step <= (init_b - init_a)) > + unsigned HOST_WIDE_INT step > + = absu_hwi (tree_to_shwi (DR_STEP (dra))); > + if (step != 0 > + && step <= (unsigned HOST_WIDE_INT)(init_b - init_a)) > break; > } > } > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index f66c5f5b367..fcae3ef5f35 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -2150,8 +2150,15 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > } > int cmp = compare_step_with_zero (vinfo, stmt_info); > if (cmp < 0) > - *memory_access_type = get_negative_load_store_type > - (vinfo, stmt_info, vectype, vls_type, 1); > + { > + if (single_element_p) > + /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is > + only correct for single element "interleaving" SLP. */ > + *memory_access_type = get_negative_load_store_type > + (vinfo, stmt_info, vectype, vls_type, 1); > + else > + *memory_access_type = VMAT_STRIDED_SLP; > + } > else > { > gcc_assert (!loop_vinfo || cmp > 0);