On Thu, 14 Nov 2019 at 19:10, Richard Sandiford <richard.sandif...@arm.com> wrote: > > gcc.dg/vect/bb-slp-40.c was failing on some targets because the > explicit dg-options overrode things like -maltivec. This patch > uses dg-additional-options instead. > > Also, it seems safer not to require exactly 1 instance of each message, > since that depends on the target vector length. > > gcc.dg/vect/bb-slp-41.c contained invariant constructors that are > vectorised on AArch64 (foo) and constructors that aren't (bar). > This meant that the number of times we print "Found vectorizable > constructor" depended on how many vector sizes we try, since we'd > print it for each failed attempt. > > In foo, we create invariant { b[0], ... } and { b[1], ... }, > and the test is making sure that the two separate invariant vectors > can be fed from the same vector load at b. This is a different case > from bb-slp-40.c, where the constructors are naturally separate. > (The expected count is 4 rather than 2 because we can vectorise the > epilogue too.) > > However, due to limitations in the loop vectoriser, we still do the > addition of { b[0], ... } and { b[1], ... } in the loop. Hopefully > that'll be fixed at some point, so this patch adds an alternative test > that directly needs 4 separate invariant constructors. E.g. with Joel's > SLP optimisation, the new test generates: > > ldr q4, [x1] > dup v7.4s, v4.s[0] > dup v6.4s, v4.s[1] > dup v5.4s, v4.s[2] > dup v4.4s, v4.s[3] > > instead of the somewhat bizarre: > > ldp s6, s5, [x1, 4] > ldr s4, [x1, 12] > ld1r {v7.4s}, [x1] > dup v6.4s, v6.s[0] > dup v5.4s, v5.s[0] > dup v4.4s, v4.s[0] > > The patch then disables vectorisation of the original foo in > bb-vect-slp-41.c, so that we get the same correctness testing > for bar but don't need to test for specific counts. > > Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu. > OK to install? > > Richard > > > 2019-11-14 Richard Sandiford <richard.sandif...@arm.com> > > gcc/testsuite/ > PR testsuite/92366 > * gcc.dg/vect/bb-slp-40.c: Use dg-additional-options instead > of dg-options. Remove expected counts. > * gcc.dg/vect/bb-slp-41.c: Remove dg-options and explicit > dg-do run. Suppress vectorization of foo. > * gcc.dg/vect/bb-slp-42.c: New test. >
Hi Richard, I've noticed that gcc.dg/vect/bb-slp-42.c fails on armeb-linux-gnueabihf when GCC is configured --with-cpu cortex-a9 --with-fpu neon-fp16. FAIL: gcc.dg/vect/bb-slp-42.c -flto -ffat-lto-objects scan-tree-dump slp1 "Found vectorizable constructor" FAIL: gcc.dg/vect/bb-slp-42.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "vectorizing stmts using SLP" 4 FAIL: gcc.dg/vect/bb-slp-42.c scan-tree-dump slp1 "Found vectorizable constructor" FAIL: gcc.dg/vect/bb-slp-42.c scan-tree-dump-times slp1 "vectorizing stmts using SLP" 4 This is test UNSUPPORTED when GCC is configured --with-fpu vfpv3-d16-fp16. Not sure we want to bother since quite a few vectorization tests already fail on armeb... Christophe > Index: gcc/testsuite/gcc.dg/vect/bb-slp-40.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2019-11-04 21:13:57.363758109 > +0000 > +++ gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2019-11-14 18:08:36.323546916 > +0000 > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O3 -fdump-tree-slp-all" } */ > +/* { dg-additional-options "-fvect-cost-model=dynamic" } */ > /* { dg-require-effective-target vect_int } */ > > char g_d[1024], g_s1[1024], g_s2[1024]; > @@ -30,5 +30,5 @@ void foo(void) > } > > /* See that we vectorize an SLP instance. */ > -/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 > "slp1" } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" > } } */ > +/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" } } */ > +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */ > Index: gcc/testsuite/gcc.dg/vect/bb-slp-41.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2019-11-04 21:13:57.363758109 > +0000 > +++ gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2019-11-14 18:08:36.323546916 > +0000 > @@ -1,10 +1,9 @@ > -/* { dg-do run } */ > -/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */ > /* { dg-require-effective-target vect_int } */ > > #define ARR_SIZE 1000 > > -void foo (int *a, int *b) > +void __attribute__((optimize (0))) > +foo (int *a, int *b) > { > int i; > for (i = 0; i < (ARR_SIZE - 2); ++i) > @@ -56,6 +55,4 @@ int main () > return 0; > > } > -/* See that we vectorize an SLP instance. */ > -/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 > "slp1" } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" > } } */ > +/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "slp1" } } > */ > Index: gcc/testsuite/gcc.dg/vect/bb-slp-42.c > =================================================================== > --- /dev/null 2019-09-17 11:41:18.176664108 +0100 > +++ gcc/testsuite/gcc.dg/vect/bb-slp-42.c 2019-11-14 18:08:36.323546916 > +0000 > @@ -0,0 +1,49 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target vect_perm } */ > + > +#include "tree-vect.h" > + > +#define ARR_SIZE 1024 > + > +void __attribute__((noipa)) > +foo (int a[][ARR_SIZE], int *b) > +{ > + int i; > + for (i = 0; i < ARR_SIZE; ++i) > + { > + a[0][i] += b[0]; > + a[1][i] += b[1]; > + a[2][i] += b[2]; > + a[3][i] += b[3]; > + } > +} > + > +int > +main () > +{ > + int a[4][ARR_SIZE]; > + int b[4]; > + > + check_vect (); > + > + for (int i = 0; i < 4; ++i) > + { > + b[i] = 20 * i; > + for (int j = 0; j < ARR_SIZE; ++j) > + a[i][j] = (i + 1) * ARR_SIZE - j; > + } > + > + foo (a, b); > + > + for (int i = 0; i < 4; ++i) > + for (int j = 0; j < ARR_SIZE; ++j) > + if (a[i][j] != (i + 1) * ARR_SIZE - j + 20 * i) > + __builtin_abort (); > + > + return 0; > + > +} > + > +/* See that we vectorize an SLP instance. */ > +/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" { > target { ! vect_fully_masked } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" > { target { ! vect_fully_masked } } } } */