On 09/11/15 16:35, Tom de Vries wrote:
Hi,this patch series for stage1 trunk adds support to: - parallelize oacc kernels regions using parloops, and - map the loops onto the oacc gang dimension. The patch series contains these patches: 1 Insert new exit block only when needed in transform_to_exit_first_loop_alt 2 Make create_parallel_loop return void 3 Ignore reduction clause on kernels directive 4 Implement -foffload-alias 5 Add in_oacc_kernels_region in struct loop 6 Add pass_oacc_kernels 7 Add pass_dominator_oacc_kernels 8 Add pass_ch_oacc_kernels 9 Add pass_parallelize_loops_oacc_kernels 10 Add pass_oacc_kernels pass group in passes.def 11 Update testcases after adding kernels pass group 12 Handle acc loop directive 13 Add c-c++-common/goacc/kernels-*.c 14 Add gfortran.dg/goacc/kernels-*.f95 15 Add libgomp.oacc-c-c++-common/kernels-*.c 16 Add libgomp.oacc-fortran/kernels-*.f95 The first 9 patches are more or less independent, but patches 10-16 are intended to be committed at the same time. Bootstrapped and reg-tested on x86_64. Build and reg-tested with nvidia accelerator, in combination with a patch that enables accelerator testing (which is submitted at https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ). I'll post the individual patches in reply to this message.
This patch adds C/C++ oacc kernels execution tests. Thanks, - Tom
Add libgomp.oacc-c-c++-common/kernels-*.c 2015-11-09 Tom de Vries <[email protected]> * testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c: Same. * testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c: Same. --- .../libgomp.oacc-c-c++-common/kernels-loop-2.c | 47 ++++++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-3.c | 34 +++++++++++++ .../kernels-loop-and-seq-2.c | 36 ++++++++++++++ .../kernels-loop-and-seq-3.c | 37 ++++++++++++++ .../kernels-loop-and-seq-4.c | 36 ++++++++++++++ .../kernels-loop-and-seq-5.c | 37 ++++++++++++++ .../kernels-loop-and-seq-6.c | 36 ++++++++++++++ .../kernels-loop-and-seq.c | 37 ++++++++++++++ .../kernels-loop-collapse.c | 40 ++++++++++++++++ .../kernels-loop-data-2.c | 56 ++++++++++++++++++++++ .../kernels-loop-data-enter-exit-2.c | 54 +++++++++++++++++++++ .../kernels-loop-data-enter-exit.c | 51 ++++++++++++++++++++ .../kernels-loop-data-update.c | 53 ++++++++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-data.c | 50 +++++++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-g.c | 5 ++ .../kernels-loop-mod-not-zero.c | 41 ++++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-n.c | 47 ++++++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-nest.c | 26 ++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop.c | 41 ++++++++++++++++ .../kernels-parallel-loop-data-enter-exit.c | 52 ++++++++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-reduction.c | 37 ++++++++++++++ 21 files changed, 853 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c new file mode 100644 index 0000000..13e57bd --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc kernels copyout (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels copyout (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c new file mode 100644 index 0000000..f61a74a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int i; + + unsigned int *__restrict c; + + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + + for (COUNTERTYPE i = 0; i < N; i++) + c[i] = i * 2; + +#pragma acc kernels copy (c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = c[ii] + ii + 1; + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != i * 2 + i + 1) + abort (); + + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c new file mode 100644 index 0000000..2e4100f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 32 + +unsigned int +foo (int n, unsigned int *a) +{ +#pragma acc kernels copy (a[0:N]) + { + a[0] = a[0] + 1; + + for (int i = 0; i < n; i++) + a[i] = 1; + } + + return a[0]; +} + +int +main (void) +{ + unsigned int a[N]; + unsigned res, i; + + for (i = 0; i < N; ++i) + a[i] = i % 4; + + res = foo (N, a); + if (res != 1) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c new file mode 100644 index 0000000..b3e736b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 32 + +unsigned int +foo (int n, unsigned int *a) +{ + +#pragma acc kernels copy (a[0:N]) + { + for (int i = 0; i < n; i++) + a[i] = 1; + + a[0] = 2; + } + + return a[0]; +} + +int +main (void) +{ + unsigned int a[N]; + unsigned res, i; + + for (i = 0; i < N; ++i) + a[i] = i % 4; + + res = foo (N, a); + if (res != 2) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c new file mode 100644 index 0000000..8b9affa --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 32 + +unsigned int +foo (int n, unsigned int *a) +{ +#pragma acc kernels copy (a[0:N]) + { + a[0] = 2; + + for (int i = 0; i < n; i++) + a[i] = 1; + } + + return a[0]; +} + +int +main (void) +{ + unsigned int a[N]; + unsigned res, i; + + for (i = 0; i < N; ++i) + a[i] = i % 4; + + res = foo (N, a); + if (res != 1) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c new file mode 100644 index 0000000..83d4e7f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 32 + +unsigned int +foo (int n, unsigned int *a) +{ + int r; +#pragma acc kernels copyout(r) copy (a[0:N]) + { + r = a[0]; + + for (int i = 0; i < n; i++) + a[i] = 1; + } + + return r; +} + +int +main (void) +{ + unsigned int a[N]; + unsigned res, i; + + for (i = 0; i < N; ++i) + a[i] = i % 4; + + res = foo (N, a); + if (res != 0) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c new file mode 100644 index 0000000..01d5e5e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 32 + +unsigned int +foo (int n, unsigned int *a) +{ +#pragma acc kernels copy (a[0:N]) + { + int r = a[0]; + + for (int i = 0; i < n; i++) + a[i] = 1 + r; + } + + return a[0]; +} + +int +main (void) +{ + unsigned int a[N]; + unsigned res, i; + + for (i = 0; i < N; ++i) + a[i] = i % 4; + + res = foo (N, a); + if (res != 1) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c new file mode 100644 index 0000000..61d1283 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 32 + +unsigned int +foo (int n, unsigned int *a) +{ + +#pragma acc kernels copy (a[0:N]) + { + for (int i = 0; i < n; i++) + a[i] = 1; + + a[0] = a[0] + 1; + } + + return a[0]; +} + +int +main (void) +{ + unsigned int a[N]; + unsigned res, i; + + for (i = 0; i < N; ++i) + a[i] = i % 4; + + res = foo (N, a); + if (res != 2) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c new file mode 100644 index 0000000..f7f04cb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 100 + +int a[N][N]; + +void __attribute__((noinline, noclone)) +foo (int m, int n) +{ + int i, j; + #pragma acc kernels + { +#pragma acc loop collapse(2) + for (i = 0; i < m; i++) + for (j = 0; j < n; j++) + a[i][j] = 1; + } +} + +int +main (void) +{ + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[i][j] = 0; + + foo (N, N); + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + if (a[i][j] != 1) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c new file mode 100644 index 0000000..b889ef9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + } + +#pragma acc data copyout (b[0:N]) + { +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + } + +#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c new file mode 100644 index 0000000..d508a44 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N]) +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } +#pragma acc exit data copyout (a[0:N]) + +#pragma acc enter data create (b[0:N]) +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } +#pragma acc exit data copyout (b[0:N]) + + +#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N]) +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } +#pragma acc exit data copyout (c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c new file mode 100644 index 0000000..11d82f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c new file mode 100644 index 0000000..a7d4e84 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc update device (b[0:N]) + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c new file mode 100644 index 0000000..607d7de --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N], b[0:N], c[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c new file mode 100644 index 0000000..96b6e4e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-g" } */ + +#include "kernels-loop.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c new file mode 100644 index 0000000..1433cb2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N ((1024 * 512) + 1) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c new file mode 100644 index 0000000..fd0d5b1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N ((1024 * 512) + 1) +#define COUNTERTYPE unsigned int + +static int __attribute__((noinline,noclone)) +foo (COUNTERTYPE n) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (n * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (n * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (n * sizeof (unsigned int)); + + for (COUNTERTYPE i = 0; i < n; i++) + a[i] = i * 2; + + for (COUNTERTYPE i = 0; i < n; i++) + b[i] = i * 4; + +#pragma acc kernels copyin (a[0:n], b[0:n]) copyout (c[0:n]) + { + for (COUNTERTYPE ii = 0; ii < n; ii++) + c[ii] = a[ii] + b[ii]; + } + + for (COUNTERTYPE i = 0; i < n; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +int +main (void) +{ + return foo (N); +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c new file mode 100644 index 0000000..21d2599 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N 1000 + +int +main (void) +{ + int x[N][N]; + +#pragma acc kernels copyout (x) + { + for (int ii = 0; ii < N; ii++) + for (int jj = 0; jj < N; jj++) + x[ii][jj] = ii + jj + 3; + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + if (x[i][j] != i + j + 3) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c new file mode 100644 index 0000000..3762e5a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c new file mode 100644 index 0000000..767f6c8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc parallel present (b[0:N]) + { +#pragma acc loop + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c new file mode 100644 index 0000000..511e25f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include <stdlib.h> + +#define n 10000 + +unsigned int a[n]; + +void __attribute__((noinline,noclone)) +foo (void) +{ + int i; + unsigned int sum = 1; + +#pragma acc kernels copyin (a[0:n]) copy (sum) + { + for (i = 0; i < n; ++i) + sum += a[i]; + } + + if (sum != 5001) + abort (); +} + +int +main () +{ + int i; + + for (i = 0; i < n; ++i) + a[i] = i % 2; + + foo (); + + return 0; +} -- 1.9.1
