On Thu, Apr 28, 2016 at 3:26 PM, Ilya Enkovich <enkovich....@gmail.com> wrote: > On 27 Apr 16:05, Richard Biener wrote: >> >> >> >> I'd like to see testcases covering the corner-cases - have them have >> >> upper bound estimates by adjusting known array sizes and also cover >> >> the case of peeling for gaps. >> > >> > OK, I'll make more tests. >> > Thanks, >> > Ilya >> > >> >> >> >> Richard. >> >> > > Could you please look at new tests? I added one simple case with > known array size and similar tests with a peeling for gaps w/ and > w/o vector iteration peeled. > > Checked new tests with RUNTESTFLAGS="vect.exp=vect-nb-iter-ub-* > --target_board=unix{-m32,} > on x86_64-pc-linux-gnu. OK for trunk?
Can you make the new testcases runtime ones, thus check that the vectorized outcome is ok (so we don't forget any trailing iterations)? Ok with that change. Richard. > Thanks, > Ilya > -- > gcc/ > > 2016-04-28 Ilya Enkovich <ilya.enkov...@intel.com> > > * tree-vect-loop.c (vect_transform_loop): Fix > nb_iterations_upper_bound computation for vectorized loop. > > gcc/testsuite/ > > 2016-04-28 Ilya Enkovich <ilya.enkov...@intel.com> > > * gcc.target/i386/vect-unpack-2.c (avx512bw_test): Avoid > optimization of vector loop. > * gcc.target/i386/vect-unpack-3.c: New test. > * gcc.dg/vect/vect-nb-iter-ub-1.c: New test. > * gcc.dg/vect/vect-nb-iter-ub-2.c: New test. > * gcc.dg/vect/vect-nb-iter-ub-3.c: New test. > > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c > b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c > new file mode 100644 > index 0000000..b7504a8 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-1.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target > { i?86-*-* x86_64-*-* } } } */ > + > +int ii[127]; > +char cc[127]; > + > +void > +foo (int s) > +{ > + int i; > + for (i = 0; i < s; i++) > + ii[i] = (int) cc[i]; > +} > + > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { > i?86-*-* x86_64-*-* } } } } */ > +/* { dg-final { scan-tree-dump "loop turned into non-loop; it never loops" > "cunroll" { target { i?86-*-* x86_64-*-* } } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c > b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c > new file mode 100644 > index 0000000..5332636 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target > { i?86-*-* x86_64-*-* } } } */ > + > +int ii[128]; > +char cc[256]; > + > +void > +foo (int s) > +{ > + int i; > + for (i = 0; i < s; i++) > + ii[i] = (int) cc[i*2]; > +} > + > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { > i?86-*-* x86_64-*-* } } } } */ > +/* { dg-final { scan-tree-dump "loop turned into non-loop; it never loops" > "cunroll" { target { i?86-*-* x86_64-*-* } } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c > b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c > new file mode 100644 > index 0000000..5610f6a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-3.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-mavx512bw -fdump-tree-cunroll-details" { target > { i?86-*-* x86_64-*-* } } } */ > + > +int ii[130]; > +char cc[258]; > + > +void > +foo (int s) > +{ > + int i; > + for (i = 0; i < s; i++) > + ii[i] = (int) cc[i*2]; > +} > + > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { > i?86-*-* x86_64-*-* } } } } */ > +/* { dg-final { scan-tree-dump-not "loop turned into non-loop; it never > loops" "cunroll" { target { i?86-*-* x86_64-*-* } } } } */ > diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c > b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c > index 4825248..51c518e 100644 > --- a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c > +++ b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c > @@ -6,19 +6,22 @@ > > #define N 120 > signed int yy[10000]; > +signed char zz[10000]; > > void > -__attribute__ ((noinline)) foo (signed char s) > +__attribute__ ((noinline,noclone)) foo (int s) > { > - signed char i; > + int i; > for (i = 0; i < s; i++) > - yy[i] = (signed int) i; > + yy[i] = zz[i]; > } > > void > avx512bw_test () > { > signed char i; > + for (i = 0; i < N; i++) > + zz[i] = i; > foo (N); > for (i = 0; i < N; i++) > if ( (signed int)i != yy [i] ) > diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-3.c > b/gcc/testsuite/gcc.target/i386/vect-unpack-3.c > new file mode 100644 > index 0000000..eb8a93e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/vect-unpack-3.c > @@ -0,0 +1,29 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-vect-details -ftree-vectorize -ffast-math > -mavx512bw -save-temps" } */ > +/* { dg-require-effective-target avx512bw } */ > + > +#include "avx512bw-check.h" > + > +#define N 120 > +signed int yy[10000]; > + > +void > +__attribute__ ((noinline)) foo (signed char s) > +{ > + signed char i; > + for (i = 0; i < s; i++) > + yy[i] = (signed int) i; > +} > + > +void > +avx512bw_test () > +{ > + signed char i; > + foo (N); > + for (i = 0; i < N; i++) > + if ( (signed int)i != yy [i] ) > + abort (); > +} > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > +/* { dg-final { scan-assembler-not "vpmovsxbw\[ \\t\]+\[^\n\]*%zmm" } } */ > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index d813b86..da98211 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -6921,11 +6921,13 @@ vect_transform_loop (loop_vec_info loop_vinfo) > /* Reduce loop iterations by the vectorization factor. */ > scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor), > expected_iterations / vectorization_factor); > - loop->nb_iterations_upper_bound > - = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor); > if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) > && loop->nb_iterations_upper_bound != 0) > loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1; > + loop->nb_iterations_upper_bound > + = wi::udiv_floor (loop->nb_iterations_upper_bound + 1, > + vectorization_factor) - 1; > + > if (loop->any_estimate) > { > loop->nb_iterations_estimate