On November 14, 2019 7:10:10 PM GMT+01:00, Richard Sandiford <richard.sandif...@arm.com> wrote: >gcc.dg/vect/bb-slp-40.c was failing on some targets because the >explicit dg-options overrode things like -maltivec. This patch >uses dg-additional-options instead. > >Also, it seems safer not to require exactly 1 instance of each message, >since that depends on the target vector length. > >gcc.dg/vect/bb-slp-41.c contained invariant constructors that are >vectorised on AArch64 (foo) and constructors that aren't (bar). >This meant that the number of times we print "Found vectorizable >constructor" depended on how many vector sizes we try, since we'd >print it for each failed attempt. > >In foo, we create invariant { b[0], ... } and { b[1], ... }, >and the test is making sure that the two separate invariant vectors >can be fed from the same vector load at b. This is a different case >from bb-slp-40.c, where the constructors are naturally separate. >(The expected count is 4 rather than 2 because we can vectorise the >epilogue too.) > >However, due to limitations in the loop vectoriser, we still do the >addition of { b[0], ... } and { b[1], ... } in the loop. Hopefully >that'll be fixed at some point, so this patch adds an alternative test >that directly needs 4 separate invariant constructors. E.g. with >Joel's >SLP optimisation, the new test generates: > > ldr q4, [x1] > dup v7.4s, v4.s[0] > dup v6.4s, v4.s[1] > dup v5.4s, v4.s[2] > dup v4.4s, v4.s[3] > >instead of the somewhat bizarre: > > ldp s6, s5, [x1, 4] > ldr s4, [x1, 12] > ld1r {v7.4s}, [x1] > dup v6.4s, v6.s[0] > dup v5.4s, v5.s[0] > dup v4.4s, v4.s[0] > >The patch then disables vectorisation of the original foo in >bb-vect-slp-41.c, so that we get the same correctness testing >for bar but don't need to test for specific counts. > >Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu. >OK to install?
Ok. Richard. >Richard > > >2019-11-14 Richard Sandiford <richard.sandif...@arm.com> > >gcc/testsuite/ > PR testsuite/92366 > * gcc.dg/vect/bb-slp-40.c: Use dg-additional-options instead > of dg-options. Remove expected counts. > * gcc.dg/vect/bb-slp-41.c: Remove dg-options and explicit > dg-do run. Suppress vectorization of foo. > * gcc.dg/vect/bb-slp-42.c: New test. > >Index: gcc/testsuite/gcc.dg/vect/bb-slp-40.c >=================================================================== >--- gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2019-11-04 21:13:57.363758109 >+0000 >+++ gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2019-11-14 18:08:36.323546916 >+0000 >@@ -1,5 +1,5 @@ > /* { dg-do compile } */ >-/* { dg-options "-O3 -fdump-tree-slp-all" } */ >+/* { dg-additional-options "-fvect-cost-model=dynamic" } */ > /* { dg-require-effective-target vect_int } */ > > char g_d[1024], g_s1[1024], g_s2[1024]; >@@ -30,5 +30,5 @@ void foo(void) > } > > /* See that we vectorize an SLP instance. */ >-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" >1 "slp1" } } */ >-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 >"slp1" } } */ >+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" >} } */ >+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } >} */ >Index: gcc/testsuite/gcc.dg/vect/bb-slp-41.c >=================================================================== >--- gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2019-11-04 21:13:57.363758109 >+0000 >+++ gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2019-11-14 18:08:36.323546916 >+0000 >@@ -1,10 +1,9 @@ >-/* { dg-do run } */ >-/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */ > /* { dg-require-effective-target vect_int } */ > > #define ARR_SIZE 1000 > >-void foo (int *a, int *b) >+void __attribute__((optimize (0))) >+foo (int *a, int *b) > { > int i; > for (i = 0; i < (ARR_SIZE - 2); ++i) >@@ -56,6 +55,4 @@ int main () > return 0; > > } >-/* See that we vectorize an SLP instance. */ >-/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" >12 "slp1" } } */ >-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 >"slp1" } } */ >+/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" >"slp1" } } */ >Index: gcc/testsuite/gcc.dg/vect/bb-slp-42.c >=================================================================== >--- /dev/null 2019-09-17 11:41:18.176664108 +0100 >+++ gcc/testsuite/gcc.dg/vect/bb-slp-42.c 2019-11-14 18:08:36.323546916 >+0000 >@@ -0,0 +1,49 @@ >+/* { dg-require-effective-target vect_int } */ >+/* { dg-require-effective-target vect_perm } */ >+ >+#include "tree-vect.h" >+ >+#define ARR_SIZE 1024 >+ >+void __attribute__((noipa)) >+foo (int a[][ARR_SIZE], int *b) >+{ >+ int i; >+ for (i = 0; i < ARR_SIZE; ++i) >+ { >+ a[0][i] += b[0]; >+ a[1][i] += b[1]; >+ a[2][i] += b[2]; >+ a[3][i] += b[3]; >+ } >+} >+ >+int >+main () >+{ >+ int a[4][ARR_SIZE]; >+ int b[4]; >+ >+ check_vect (); >+ >+ for (int i = 0; i < 4; ++i) >+ { >+ b[i] = 20 * i; >+ for (int j = 0; j < ARR_SIZE; ++j) >+ a[i][j] = (i + 1) * ARR_SIZE - j; >+ } >+ >+ foo (a, b); >+ >+ for (int i = 0; i < 4; ++i) >+ for (int j = 0; j < ARR_SIZE; ++j) >+ if (a[i][j] != (i + 1) * ARR_SIZE - j + 20 * i) >+ __builtin_abort (); >+ >+ return 0; >+ >+} >+ >+/* See that we vectorize an SLP instance. */ >+/* { dg-final { scan-tree-dump "Found vectorizable constructor" "slp1" >{ target { ! vect_fully_masked } } } } */ >+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 >"slp1" { target { ! vect_fully_masked } } } } */