Hi, This patch adds several tests to check tails vectorization functionality.
Thanks, Ilya -- gcc/testsuite/ 2016-07-05 Ilya Enkovich <ilya.enkov...@intel.com> * lib/target-supports.exp (check_avx2_hw_available): New. (check_effective_target_avx2_runtime): New. * gcc.dg/vect/vect-tail-combine-1.c: New test. * gcc.dg/vect/vect-tail-combine-2.c: New test. * gcc.dg/vect/vect-tail-combine-3.c: New test. * gcc.dg/vect/vect-tail-combine-4.c: New test. * gcc.dg/vect/vect-tail-combine-5.c: New test. * gcc.dg/vect/vect-tail-combine-6.c: New test. * gcc.dg/vect/vect-tail-combine-7.c: New test. * gcc.dg/vect/vect-tail-combine-9.c: New test. * gcc.dg/vect/vect-tail-mask-1.c: New test. * gcc.dg/vect/vect-tail-mask-2.c: New test. * gcc.dg/vect/vect-tail-mask-3.c: New test. * gcc.dg/vect/vect-tail-mask-4.c: New test. * gcc.dg/vect/vect-tail-mask-5.c: New test. * gcc.dg/vect/vect-tail-mask-6.c: New test. * gcc.dg/vect/vect-tail-mask-7.c: New test. * gcc.dg/vect/vect-tail-mask-8.c: New test. * gcc.dg/vect/vect-tail-mask-9.c: New test. * gcc.dg/vect/vect-tail-nomask-1.c: New test. * gcc.dg/vect/vect-tail-nomask-2.c: New test. * gcc.dg/vect/vect-tail-nomask-3.c: New test. * gcc.dg/vect/vect-tail-nomask-4.c: New test. * gcc.dg/vect/vect-tail-nomask-5.c: New test. * gcc.dg/vect/vect-tail-nomask-6.c: New test. * gcc.dg/vect/vect-tail-nomask-7.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c new file mode 100644 index 0000000..134d789 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c @@ -0,0 +1,106 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size) __attribute__((weak)); +extern void free (void *); + +void __attribute__((noinline)) +test_citer (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +test_viter (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +init_data (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + for (int i = 0; i < size; i++) + { + a[i] = i; + b[i] = -i; + c[i] = 0; + asm volatile("": : :"memory"); + } + a[size] = b[size] = c[size] = size; +} + + +void __attribute__((noinline)) +run_test () +{ + int *a; + int *b; + int *c; + int i; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + + init_data (a, b, c, SIZE); + test_citer (a, b, c); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + init_data (a, b, c, SIZE); + test_viter (a, b, c, SIZE); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c new file mode 100644 index 0000000..c513c5c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c @@ -0,0 +1,134 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size); +extern void free (void *); + +void __attribute__((noinline)) +test_citer (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + if (a[i] > 0) + b[i] = a[i] + c[i]; +} + +void __attribute__((noinline)) +test_viter (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < size; i++) + if (a[i] > 0) + b[i] = a[i] + c[i]; +} + +void __attribute__((noinline)) +init_data (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int i; + for (i = 0; i < size; i++) + { + if (i % 2) + { + a[i] = i; + b[i] = 0; + c[i] = 2 * i; + } + else + { + a[i] = -i; + b[i] = i; + c[i] = 0; + } + asm volatile("": : :"memory"); + } + a[size] = b[size] = c[size] = size; +} + + +void __attribute__((noinline)) +run_test () +{ + int *a; + int *b; + int *c; + int i; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + + init_data (a, b, c, SIZE); + test_citer (a, b, c); + for (i = 0; i < SIZE; i++) + if (a[i] > 0) + { + if (b[i] != a[i] + c[i]) + __builtin_abort (); + } + else + { + if (b[i] != i) + __builtin_abort (); + } + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + init_data (a, b, c, SIZE); + test_viter (a, b, c, SIZE); + for (i = 0; i < SIZE; i++) + if (a[i] > 0) + { + if (b[i] != a[i] + c[i]) + __builtin_abort (); + } + else + { + if (b[i] != i) + __builtin_abort (); + } + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c new file mode 100644 index 0000000..17c5e95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c @@ -0,0 +1,111 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size); +extern void free (void *); + +int __attribute__((noinline)) +test_citer (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + int res = 0; + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + res += a[i] + b[i] * c[i]; + + return res; +} + +int __attribute__((noinline)) +test_viter (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int res = 0; + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < size; i++) + res += a[i] + b[i] * c[i]; + + return res; +} + +void __attribute__((noinline)) +init_data (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int i; + for (i = 0; i < size; i++) + { + a[i] = i; + b[i] = -i; + c[i] = 1; + asm volatile("": : :"memory"); + } + a[size] = b[size] = c[size] = size; +} + + +void __attribute__((noinline)) +run_test () +{ + int *a; + int *b; + int *c; + int res; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + + init_data (a, b, c, SIZE); + res = test_citer (a, b, c); + if (res != 0) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + init_data (a, b, c, SIZE); + res = test_viter (a, b, c, SIZE); + if (res != 0) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c new file mode 100644 index 0000000..854c1ab --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c @@ -0,0 +1,122 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size); +extern void free (void *); + +int __attribute__((noinline)) +test_citer (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + int res = 0; + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + if (a[i] > 0) + res += b[i] + c[i]; + + return res; +} + +int __attribute__((noinline)) +test_viter (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int res = 0; + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < size; i++) + if (a[i] > 0) + res += b[i] + c[i]; + + return res; +} + +void __attribute__((noinline)) +init_data (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int i; + for (i = 0; i < size; i++) + { + if (i % 2) + { + a[i] = i; + b[i] = -i*2; + c[i] = i*2; + } + else + { + a[i] = -i; + b[i] = i; + c[i] = 10; + } + asm volatile("": : :"memory"); + } + a[size] = b[size] = c[size] = size; +} + + +void __attribute__((noinline)) +run_test () +{ + int *a; + int *b; + int *c; + int res; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + + init_data (a, b, c, SIZE); + res = test_citer (a, b, c); + if (res != 0) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + init_data (a, b, c, SIZE); + res = test_viter (a, b, c, SIZE); + if (res != 0) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c new file mode 100644 index 0000000..9589715 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c @@ -0,0 +1,107 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size); +extern void free (void *); + +void __attribute__((noinline)) +test_citer (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + long long i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +test_viter (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + long long i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +init_data (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c, + int size) +{ + int i; + for (i = 0; i < size; i++) + { + a[i] = i; + b[i] = -i; + c[i] = 0; + asm volatile("": : :"memory"); + } + a[size] = b[size] = c[size] = size; +} + + +void __attribute__((noinline)) +run_test () +{ + int *a; + int *b; + int *c; + long long i; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + + init_data (a, b, c, SIZE); + test_citer (a, b, c); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + init_data (a, b, c, SIZE); + test_viter (a, b, c, SIZE); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c new file mode 100644 index 0000000..284b2aa --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c @@ -0,0 +1,107 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size); +extern void free (void *); + +void __attribute__((noinline)) +test_citer (long long * __restrict__ a, + long long * __restrict__ b, + long long * __restrict__ c) +{ + int i; + + a = (long long *)__builtin_assume_aligned (a, ALIGN); + b = (long long *)__builtin_assume_aligned (b, ALIGN); + c = (long long *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +test_viter (long long * __restrict__ a, + long long * __restrict__ b, + long long * __restrict__ c, + int size) +{ + int i; + + a = (long long *)__builtin_assume_aligned (a, ALIGN); + b = (long long *)__builtin_assume_aligned (b, ALIGN); + c = (long long *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < size; i++) + c[i] = a[i] + b[i]; +} + +void __attribute__((noinline)) +init_data (long long * __restrict__ a, + long long * __restrict__ b, + long long * __restrict__ c, + int size) +{ + int i; + for (i = 0; i < size; i++) + { + a[i] = i; + b[i] = -i; + c[i] = 0; + asm volatile("": : :"memory"); + } + a[size] = b[size] = c[size] = size; +} + + +void __attribute__((noinline)) +run_test () +{ + long long *a; + long long *b; + long long *c; + int i; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (long long)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (long long)) != 0) + return; + + init_data (a, b, c, SIZE); + test_citer (a, b, c); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + init_data (a, b, c, SIZE); + test_viter (a, b, c, SIZE); + for (i = 0; i < SIZE; i++) + if (c[i] != a[i] + b[i]) + __builtin_abort (); + if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c new file mode 100644 index 0000000..b328285 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c @@ -0,0 +1,155 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#define SIZE 1023 +#define ALIGN 64 + +extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size); +extern void free (void *); + +double __attribute__((noinline)) +test_citer (int * __restrict__ a, + long long * __restrict__ b, + float * __restrict__ c, + double * __restrict__ d) +{ + double res = 0; + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (long long *)__builtin_assume_aligned (b, ALIGN); + c = (float *)__builtin_assume_aligned (c, ALIGN); + d = (double *)__builtin_assume_aligned (d, ALIGN); + + for (i = 0; i < SIZE; i++) + { + a[i] = c[i] + 1; + if (b[i] < 0) + res += d[i]; + } + + return res; +} + +double __attribute__((noinline)) +test_viter (int * __restrict__ a, + long long * __restrict__ b, + float * __restrict__ c, + double * __restrict__ d, + int size) +{ + double res = 0; + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (long long *)__builtin_assume_aligned (b, ALIGN); + c = (float *)__builtin_assume_aligned (c, ALIGN); + d = (double *)__builtin_assume_aligned (d, ALIGN); + + for (i = 0; i < size; i++) + { + a[i] = c[i] + 1; + if (b[i] < 0) + res += d[i]; + } + + return res; +} + +void __attribute__((noinline)) +init_data (int * __restrict__ a, + long long * __restrict__ b, + float * __restrict__ c, + double * __restrict__ d, + int size) +{ + int i; + for (i = 0; i < size; i++) + { + if (i % 2) + { + a[i] = 0; + b[i] = i; + c[i] = 2.5; + d[i] = 1; + } + else + { + a[i] = 0; + b[i] = -i; + c[i] = 2.5; + d[i] = -1; + } + asm volatile("": : :"memory"); + } + a[size] = (int)size; + b[size] = (long long)size; + c[size] = (float)size; + d[size] = (double)size; +} + +void __attribute__((noinline)) +run_test () +{ + int *a; + long long *b; + float *c; + double *d; + double res; + int i; + + if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0) + return; + if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 0) + return; + if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (float)) != 0) + return; + if (posix_memalign ((void **)&d, ALIGN, (SIZE + 1) * sizeof (double)) != 0) + return; + + init_data (a, b, c, d, SIZE); + res = test_citer (a, b, c, d); + res += SIZE / 2; + if (res > 0.01 || res < -0.01) + __builtin_abort (); + for (i = 0; i < SIZE; i++) + if (a[i] != 3) + __builtin_abort (); + if (a[SIZE] != (int)SIZE + || b[SIZE] != (long long)SIZE + || c[SIZE] != (float)SIZE + || d[SIZE] != (double)SIZE) + __builtin_abort (); + + init_data (a, b, c, d, SIZE); + res = test_viter (a, b, c, d, SIZE); + res += SIZE / 2; + if (res > 0.01 || res < -0.01) + __builtin_abort (); + for (i = 0; i < SIZE; i++) + if (a[i] != 3) + __builtin_abort (); + if (a[SIZE] != (int)SIZE + || b[SIZE] != (long long)SIZE + || c[SIZE] != (float)SIZE + || d[SIZE] != (double)SIZE) + __builtin_abort (); + + free (a); + free (b); + free (c); +} + +int +main (int argc, const char **argv) +{ + if (!posix_memalign) + return 0; + + run_test (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c new file mode 100644 index 0000000..221835a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */ + +#define SIZE 33 +#define ALIGN 64 + +void +test (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + c[i] = a[i] + b[i]; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE COMBINED \\(VS=64\\)" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c new file mode 100644 index 0000000..7f50a17 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-1.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c new file mode 100644 index 0000000..995631c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-2.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c new file mode 100644 index 0000000..fe405bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-3.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c new file mode 100644 index 0000000..3bbd054 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-4.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c new file mode 100644 index 0000000..7541061 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-5.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c new file mode 100644 index 0000000..2af1c6a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-6.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c new file mode 100644 index 0000000..72f3119 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-7.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c new file mode 100644 index 0000000..552e974 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */ + +#define SIZE 31 +#define ALIGN 64 + +void +test (int * __restrict__ a, + int * __restrict__ b, + int * __restrict__ c) +{ + int i; + + a = (int *)__builtin_assume_aligned (a, ALIGN); + b = (int *)__builtin_assume_aligned (b, ALIGN); + c = (int *)__builtin_assume_aligned (c, ALIGN); + + for (i = 0; i < SIZE; i++) + c[i] = a[i] + b[i]; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c new file mode 100644 index 0000000..61c0f80 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */ + +#include "vect-tail-combine-9.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */ +/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=64\\)" "vect" { target { i?86-*-* x86_64-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c new file mode 100644 index 0000000..e3c40f7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-1.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c new file mode 100644 index 0000000..cea2c1d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-2.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c new file mode 100644 index 0000000..18bbbc4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-3.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c new file mode 100644 index 0000000..beb9e0f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-4.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c new file mode 100644 index 0000000..329652f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-5.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c new file mode 100644 index 0000000..3e9a405 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-6.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c new file mode 100644 index 0000000..a229414 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-weak "" } */ +/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */ + +#include "vect-tail-combine-7.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 04ca176..8b54710 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1596,6 +1596,36 @@ proc check_avx_hw_available { } { }] } +# Return 1 if the target supports executing AVX2 instructions, 0 +# otherwise. Cache the result. + +proc check_avx2_hw_available { } { + return [check_cached_effective_target avx2_hw_available { + # If this is not the right target then we can skip the test. + if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } { + expr 0 + } else { + check_runtime_nocache avx2_hw_available { + #include "cpuid.h" + int main () + { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx) + || ((ecx & bit_OSXSAVE) != bit_OSXSAVE)) + return 1; + + if (__get_cpuid_max (0, NULL) < 7) + return 1; + + __cpuid_count (7, 0, eax, ebx, ecx, edx); + + return (ebx & bit_AVX2) != bit_AVX2; + } + } "" + } + }] +} + # Return 1 if the target supports running SSE executables, 0 otherwise. proc check_effective_target_sse_runtime { } { @@ -1629,6 +1659,17 @@ proc check_effective_target_avx_runtime { } { return 0 } +# Return 1 if the target supports running AVX2 executables, 0 otherwise. + +proc check_effective_target_avx2_runtime { } { + if { [check_effective_target_avx2] + && [check_avx2_hw_available] + && [check_avx_os_support_available] } { + return 1 + } + return 0 +} + # Return 1 if we are compiling for 64-bit PowerPC but we do not use direct # move instructions for moves from GPR to FPR.