This patch updates the libgomp OpenACC reduction test cases to check worker, vector and combined gang worker vector reductions. I tried to use some macros to simplify the c test cases a bit. I probably could have made them more generic with an additional header file/macro, but then that makes it too confusing too debug. The fortran tests are a bit of a lost clause, unless someone knows how to use the preprocessor with !$acc loops.
Cesar
2015-07-17 Cesar Philippidis <ce...@codesourcery.com> libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction.h: New file. * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Update tests with worker, vector and combined reductions. * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-6.c: Likewise. * testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c index bb81759..8738927 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c @@ -3,44 +3,54 @@ /* Integer reductions. */ #include <stdlib.h> -#include <stdbool.h> - -#define ng 32 - -#define DO_PRAGMA(x) _Pragma (#x) - -#define check_reduction_op(type, op, init, b) \ - { \ - type res, vres; \ - res = (init); \ -DO_PRAGMA (acc parallel num_gangs (ng) copy (res)) \ -DO_PRAGMA (acc loop gang reduction (op:res)) \ - for (i = 0; i < n; i++) \ - res = res op (b); \ - \ - vres = (init); \ - for (i = 0; i < n; i++) \ - vres = vres op (b); \ - \ - if (res != vres) \ - abort (); \ - } +#include "reduction.h" + +const int ng = 8; +const int nw = 4; +const int vl = 32; static void -test_reductions_int (void) +test_reductions (void) { - const int n = 1000; + const int n = 100; int i; int array[n]; for (i = 0; i < n; i++) - array[i] = i; - - check_reduction_op (int, +, 0, array[i]); - check_reduction_op (int, *, 1, array[i]); - check_reduction_op (int, &, -1, array[i]); - check_reduction_op (int, |, 0, array[i]); - check_reduction_op (int, ^, 0, array[i]); + array[i] = i+1; + + /* Gang reductions. */ + check_reduction_op (int, +, 0, array[i], num_gangs (ng), gang); + check_reduction_op (int, *, 1, array[i], num_gangs (ng), gang); + check_reduction_op (int, &, -1, array[i], num_gangs (ng), gang); + check_reduction_op (int, |, 0, array[i], num_gangs (ng), gang); + check_reduction_op (int, ^, 0, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_op (int, +, 0, array[i], num_workers (nw), worker); + check_reduction_op (int, *, 1, array[i], num_workers (nw), worker); + check_reduction_op (int, &, -1, array[i], num_workers (nw), worker); + check_reduction_op (int, |, 0, array[i], num_workers (nw), worker); + check_reduction_op (int, ^, 0, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_op (int, +, 0, array[i], vector_length (vl), vector); + check_reduction_op (int, *, 1, array[i], vector_length (vl), vector); + check_reduction_op (int, &, -1, array[i], vector_length (vl), vector); + check_reduction_op (int, |, 0, array[i], vector_length (vl), vector); + check_reduction_op (int, ^, 0, array[i], vector_length (vl), vector); + + /* Combined reductions. */ + check_reduction_op (int, +, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, *, 1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, &, -1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, |, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, ^, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); } static void @@ -55,32 +65,31 @@ test_reductions_bool (void) array[i] = i; cmp_val = 5; -#if 0 - // TODO - check_reduction_op (bool, &&, true, (cmp_val > array[i])); - check_reduction_op (bool, ||, false, (cmp_val > array[i])); -#endif -} -#define check_reduction_macro(type, op, init, b) \ - { \ - type res, vres; \ - res = (init); \ -DO_PRAGMA (acc parallel num_gangs (ng) copy(res)) \ -DO_PRAGMA (acc loop gang reduction (op:res)) \ - for (i = 0; i < n; i++) \ - res = op (res, (b)); \ - \ - vres = (init); \ - for (i = 0; i < n; i++) \ - vres = op (vres, (b)); \ - \ - if (res != vres) \ - abort (); \ - } - -#define max(a, b) (((a) > (b)) ? (a) : (b)) -#define min(a, b) (((a) < (b)) ? (a) : (b)) + /* Gang reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng), + gang); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng), + gang); + + /* Worker reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_workers (nw), + worker); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_workers (nw), + worker); + + /* Vector reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), vector_length (vl), + vector); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), vector_length (vl), + vector); + + /* Combined reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker vector); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker vector); +} static void test_reductions_minmax (void) @@ -92,14 +101,32 @@ test_reductions_minmax (void) for (i = 0; i < n; i++) array[i] = i; - check_reduction_macro (int, min, n + 1, array[i]); - check_reduction_macro (int, max, -1, array[i]); + /* Gang reductions. */ + check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng), gang); + check_reduction_macro (int, max, -1, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_macro (int, min, n + 1, array[i], num_workers (nw), worker); + check_reduction_macro (int, max, -1, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_macro (int, min, n + 1, array[i], vector_length (vl), + vector); + check_reduction_macro (int, max, -1, array[i], vector_length (vl), vector); + + /* Combined reductions. */ + check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_macro (int, max, -1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); } int main (void) { - test_reductions_int (); + test_reductions (); test_reductions_bool (); test_reductions_minmax (); return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c index ba6eb27..2465ddd 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c @@ -3,123 +3,78 @@ /* float reductions. */ #include <stdlib.h> -#include <stdbool.h> -#include <math.h> +#include "reduction.h" -#define ng 32 +const int ng = 8; +const int nw = 4; +const int vl = 32; -int -main(void) +static void +test_reductions (void) { - const int n = 1000; + const int n = 100; int i; - float vresult, result, array[n]; - int lvresult, lresult; + float array[n]; for (i = 0; i < n; i++) - array[i] = i; + array[i] = i+1; - result = 0; - vresult = 0; + /* Gang reductions. */ + check_reduction_op (float, +, 0, array[i], num_gangs (ng), gang); + check_reduction_op (float, *, 1, array[i], num_gangs (ng), gang); - /* '+' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (+:result) gang - for (i = 0; i < n; i++) - result += array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult += array[i]; + /* Worker reductions. */ + check_reduction_op (float, +, 0, array[i], num_workers (nw), worker); + check_reduction_op (float, *, 1, array[i], num_workers (nw), worker); - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* '*' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (*:result) gang - for (i = 0; i < n; i++) - result *= array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult *= array[i]; - - if (fabs(result - vresult) > .0001) - abort (); - - result = 0; - vresult = 0; - - /* 'max' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (max:result) gang - for (i = 0; i < n; i++) - result = result > array[i] ? result : array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult = vresult > array[i] ? vresult : array[i]; - - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* 'min' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (min:result) gang - for (i = 0; i < n; i++) - result = result < array[i] ? result : array[i]; + /* Vector reductions. */ + check_reduction_op (float, +, 0, array[i], vector_length (vl), vector); + check_reduction_op (float, *, 1, array[i], vector_length (vl), vector); - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult = vresult < array[i] ? vresult : array[i]; - - if (result != vresult) - abort (); - - result = 5; - vresult = 5; - - lresult = 0; - lvresult = 0; - - /* '&&' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (&&:lresult) gang - for (i = 0; i < n; i++) - lresult = lresult && (result > array[i]); - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - lvresult = lresult && (result > array[i]); - - if (lresult != lvresult) - abort (); - - result = 5; - vresult = 5; - - lresult = 0; - lvresult = 0; + /* Combined reductions. */ + check_reduction_op (float, +, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (float, *, 1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); +} - /* '||' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (||:lresult) gang - for (i = 0; i < n; i++) - lresult = lresult || (result > array[i]); +static void +test_reductions_minmax (void) +{ + const int n = 1000; + int i; + float array[n]; - /* Verify the reduction. */ for (i = 0; i < n; i++) - lvresult = lresult || (result > array[i]); + array[i] = i; - if (lresult != lvresult) - abort (); + /* Gang reductions. */ + check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng), gang); + check_reduction_macro (float, max, -1, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_macro (float, min, n + 1, array[i], num_workers (nw), + worker); + check_reduction_macro (float, max, -1, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_macro (float, min, n + 1, array[i], vector_length (vl), + vector); + check_reduction_macro (float, max, -1, array[i], vector_length (vl), vector); + + /* Combined reductions. */ + check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_macro (float, max, -1, array[i], num_gangs (ng) + num_workers (nw)vector_length (vl), gang worker + vector); +} +int +main (void) +{ + test_reductions (); + test_reductions_minmax (); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c index 5ecc651..091421f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c @@ -3,123 +3,79 @@ /* double reductions. */ #include <stdlib.h> -#include <stdbool.h> -#include <math.h> +#include "reduction.h" -#define ng 32 +const int ng = 8; +const int nw = 4; +const int vl = 32; -int -main(void) +static void +test_reductions (void) { - const int n = 1000; + const int n = 10; int i; - double vresult, result, array[n]; - int lvresult, lresult; + double array[n]; for (i = 0; i < n; i++) - array[i] = i; + array[i] = i+1; - result = 0; - vresult = 0; + /* Gang reductions. */ + check_reduction_op (double, +, 0, array[i], num_gangs (ng), gang); + check_reduction_op (double, *, 1, array[i], num_gangs (ng), gang); - /* '+' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (+:result) gang - for (i = 0; i < n; i++) - result += array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult += array[i]; + /* Worker reductions. */ + check_reduction_op (double, +, 0, array[i], num_workers (nw), worker); + check_reduction_op (double, *, 1, array[i], num_workers (nw), worker); - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* '*' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (*:result) gang - for (i = 0; i < n; i++) - result *= array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult *= array[i]; - - if (fabs(result - vresult) > .0001) - abort (); - - result = 0; - vresult = 0; - - /* 'max' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (max:result) gang - for (i = 0; i < n; i++) - result = result > array[i] ? result : array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult = vresult > array[i] ? vresult : array[i]; - - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* 'min' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (min:result) gang - for (i = 0; i < n; i++) - result = result < array[i] ? result : array[i]; + /* Vector reductions. */ + check_reduction_op (double, +, 0, array[i], vector_length (vl), vector); + check_reduction_op (double, *, 1, array[i], vector_length (vl), vector); - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult = vresult < array[i] ? vresult : array[i]; - - if (result != vresult) - abort (); - - result = 5; - vresult = 5; - - lresult = 0; - lvresult = 0; - - /* '&&' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (&&:lresult) gang - for (i = 0; i < n; i++) - lresult = lresult && (result > array[i]); - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - lvresult = lresult && (result > array[i]); - - if (lresult != lvresult) - abort (); - - result = 5; - vresult = 5; - - lresult = 0; - lvresult = 0; + /* Combined reductions. */ + check_reduction_op (double, +, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (double, *, 1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); +} - /* '||' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (||:lresult) gang - for (i = 0; i < n; i++) - lresult = lresult || (result > array[i]); +static void +test_reductions_minmax (void) +{ + const int n = 1000; + int i; + double array[n]; - /* Verify the reduction. */ for (i = 0; i < n; i++) - lvresult = lresult || (result > array[i]); + array[i] = i; - if (lresult != lvresult) - abort (); + /* Gang reductions. */ + check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng), gang); + check_reduction_macro (double, max, -1, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_macro (double, min, n + 1, array[i], num_workers (nw), + worker); + check_reduction_macro (double, max, -1, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_macro (double, min, n + 1, array[i], vector_length (vl), + vector); + check_reduction_macro (double, max, -1, array[i], vector_length (vl), + vector); + + /* Combined reductions. */ + check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_macro (double, max, -1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); +} +int +main (void) +{ + test_reductions (); + test_reductions_minmax (); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c index c7069e9..816b09f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c @@ -1,95 +1,53 @@ /* { dg-do run { target { ! { hppa*-*-hpux* } } } } */ -/* { dg-xfail-run-if "libgomp: cuStreamSynchronize error: launch timeout" { openacc_nvidia_accel_selected } } */ /* complex reductions. */ #include <stdlib.h> -#include <stdbool.h> -#include <math.h> #include <complex.h> +#include "reduction.h" -#define ng 32 +const int ng = 8; +const int nw = 4; +const int vl = 32; -int -main(void) +static void +test_reductions (void) { - const int n = 1000; + const int n = 10; int i; - double _Complex vresult, result, array[n]; - bool lvresult, lresult; - - for (i = 0; i < n; i++) - array[i] = i; - - result = 0; - vresult = 0; - - /* '+' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (+:result) gang - for (i = 0; i < n; i++) - result += array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult += array[i]; - - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* '*' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) -#pragma acc loop reduction (*:result) gang - for (i = 0; i < n; i++) - result *= array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult *= array[i]; - - if (cabsf (result - vresult) > .0001) - abort (); - - result = 5; - vresult = 5; - - lresult = false; - lvresult = false; - - /* '&&' reductions. */ -#pragma acc parallel num_gangs (ng) copy (lresult) -#pragma acc loop reduction (&&:lresult) gang - for (i = 0; i < n; i++) - lresult = lresult && (creal(result) > creal(array[i])); - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - lvresult = lvresult && (creal(result) > creal(array[i])); - - if (lresult != lvresult) - abort (); - - result = 5; - vresult = 5; - - lresult = false; - lvresult = false; - - /* '||' reductions. */ -#pragma acc parallel num_gangs (ng) copy (lresult) -#pragma acc loop reduction (||:lresult) gang - for (i = 0; i < n; i++) - lresult = lresult || (creal(result) > creal(array[i])); - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - lvresult = lvresult || (creal(result) > creal(array[i])); - - if (lresult != lvresult) - abort (); + double _Complex array[n]; + + for (i = 0; i < n; i++) + array[i] = i+1; + + /* Gang reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng), gang); + check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), num_workers (nw), + worker); + check_reduction_op (double, *, 1, creal (array[i]), num_workers (nw), + worker); + + /* Vector reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), vector_length (vl), + vector); + check_reduction_op (double, *, 1, creal (array[i]), vector_length (vl), + vector); + + /* Combined reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); +} +int +main (void) +{ + test_reductions (); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c index 23a194c..e979ab6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c @@ -1,32 +1,53 @@ +/* { dg-do run } */ + +/* Multiple reductions. */ + #include <stdio.h> #include <stdlib.h> +const int ng = 8; +const int nw = 4; +const int vl = 32; + +const int n = 100; + +#define DO_PRAGMA(x) _Pragma (#x) + +#define check_reduction(gwv_par, gwv_loop) \ + { \ + s1 = 2; s2 = 5; \ +DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) \ +DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2)) \ + for (i = 0; i < n; i++) \ + { \ + s1 = s1 + 3; \ + s2 = s2 + 5; \ + } \ + \ + if (s1 != v1 && s2 != v2) \ + abort (); \ + } + int main (void) { int s1 = 2, s2 = 5, v1 = 2, v2 = 5; - int n = 100; int i; -#pragma acc parallel num_gangs (1000) copy (s1, s2) -#pragma acc loop reduction (+:s1, s2) gang - for (i = 0; i < n; i++) - { - s1 = s1 + 3; - s2 = s2 + 2; - } - for (i = 0; i < n; i++) { v1 = v1 + 3; v2 = v2 + 2; } - - if (s1 != v1) - abort (); - - if (s2 != v2) - abort (); - + + check_reduction (num_gangs (ng), gang); + + /* Nvptx targets require a vector_length or 32 in to allow spinlocks with + gangs. */ + check_reduction (num_workers (nw) vector_length (vl), worker); + check_reduction (vector_length (vl), vector); + check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang + worker vector); + return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c new file mode 100644 index 0000000..17fa951 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +/* Check nested reductions. */ + +#include <assert.h> + +#define n 1000 + +int +main () +{ + int i, j, red = 0, vred = 0; + int chunksize = 10; + +#pragma acc parallel num_gangs (10) vector_length (32) copy (red) +#pragma acc loop reduction (+:red) gang + for (i = 0; i < n/chunksize; i++) +#pragma acc loop reduction (+:red) vector + for (j = 0; j < chunksize; j++) + red += j; + + for (i = 0; i < n/chunksize; i++) + for (j = 0; j < chunksize; j++) + vred += j; + + assert (red == vred); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h new file mode 100644 index 0000000..1b3f8d4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h @@ -0,0 +1,43 @@ +#ifndef REDUCTION_H +#define REDUCTION_H + +#define DO_PRAGMA(x) _Pragma (#x) + +#define check_reduction_op(type, op, init, b, gwv_par, gwv_loop) \ + { \ + type res, vres; \ + res = (init); \ +DO_PRAGMA (acc parallel gwv_par copy (res)) \ +DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \ + for (i = 0; i < n; i++) \ + res = res op (b); \ + \ + vres = (init); \ + for (i = 0; i < n; i++) \ + vres = vres op (b); \ + \ + if (res != vres) \ + abort (); \ + } + +#define check_reduction_macro(type, op, init, b, gwv_par, gwv_loop) \ + { \ + type res, vres; \ + res = (init); \ + DO_PRAGMA (acc parallel gwv_par copy(res)) \ +DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \ + for (i = 0; i < n; i++) \ + res = op (res, (b)); \ + \ + vres = (init); \ + for (i = 0; i < n; i++) \ + vres = op (vres, (b)); \ + \ + if (res != vres) \ + abort (); \ + } + +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 index 3419ffd..03cca04 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 @@ -5,24 +5,50 @@ program reduction_1 implicit none - integer, parameter :: n = 10, gangs = 20 - integer :: i, vresult, result - logical :: lresult, lvresult + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 + integer :: i, vresult, rg, rw, rv, rc + logical :: lrg, lrw, lrv, lrc, lvresult integer, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + array(i) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(+:result) gang + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker do i = 1, n - result = result + array(i) + rw = rw + array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector + do i = 1, n + rc = rc + array(i) end do !$acc end parallel @@ -31,17 +57,46 @@ program reduction_1 vresult = vresult + array(i) end do - if (result.ne.vresult) call abort - - result = 0 - vresult = 0 + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! '*' reductions + ! - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(*:result) gang + rg = 1 + rw = 1 + rv = 1 + rc = 1 + vresult = 1 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang do i = 1, n - result = result * array(i) + rg = rg * array(i) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker + do i = 1, n + rw = rw * array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector + do i = 1, n + rc = rc * array(i) end do !$acc end parallel @@ -50,17 +105,46 @@ program reduction_1 vresult = vresult * array(i) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! 'max' reductions + ! - result = 0 + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! 'max' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(max:rg) gang + do i = 1, n + rg = max (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(max:rw) worker + do i = 1, n + rw = max (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(max:rv) vector + do i = 1, n + rv = max (rv, array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(max:result) gang + !$acc parallel num_gangs(ng) Num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(max:rc) gang worker vector do i = 1, n - result = max (result, array(i)) + rc = max (rc, array(i)) end do !$acc end parallel @@ -69,17 +153,46 @@ program reduction_1 vresult = max (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! 'min' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(min:rg) gang + do i = 1, n + rg = min (rg, array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(min:result) gang + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(min:rw) worker do i = 1, n - result = min (result, array(i)) + rw = min (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(min:rv) vector + do i = 1, n + rv = min (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(min:rc) gang worker vector + do i = 1, n + rc = min (rc, array(i)) end do !$acc end parallel @@ -88,17 +201,46 @@ program reduction_1 vresult = min (vresult, array(i)) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! 'iand' reductions + ! - result = 1 + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 - ! 'iand' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(iand:rg) gang + do i = 1, n + rg = iand (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(iand:rw) worker + do i = 1, n + rw = iand (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(iand:rv) vector + do i = 1, n + rv = iand (rv, array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(iand:result) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(iand:rc) gang worker vector do i = 1, n - result = iand (result, array(i)) + rc = iand (rc, array(i)) end do !$acc end parallel @@ -107,17 +249,46 @@ program reduction_1 vresult = iand (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! 'ior' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(ior:rg) gang + do i = 1, n + rg = ior (rg, array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(ior:result) gang + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(ior:rw) worker do i = 1, n - result = ior (result, array(i)) + rw = ior (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(ior:rv) gang + do i = 1, n + rv = ior (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(ior:rc) gang worker vector + do i = 1, n + rc = ior (rc, array(i)) end do !$acc end parallel @@ -126,17 +297,46 @@ program reduction_1 vresult = ior (vresult, array(i)) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort - result = 0 + ! + ! 'ieor' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! 'ieor' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(ieor:rg) gang + do i = 1, n + rg = ieor (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(ieor:rw) worker + do i = 1, n + rw = ieor (rw, array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(ieor:result) gang + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(ieor:rv) vector do i = 1, n - result = ieor (result, array(i)) + rv = ieor (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(ieor:rc) gang worker vector + do i = 1, n + rc = ieor (rc, array(i)) end do !$acc end parallel @@ -145,17 +345,46 @@ program reduction_1 vresult = ieor (vresult, array(i)) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! '.and.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.and.:lrg) gang + do i = 1, n + lrg = lrg .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.and.:lrw) worker + do i = 1, n + lrw = lrw .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.and.:lrv) vector + do i = 1, n + lrv = lrv .and. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.and.:lresult) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.and.:lrc) gang worker vector do i = 1, n - lresult = lresult .and. (array(i) .ge. 5) + lrc = lrc .and. (array(i) .ge. 5) end do !$acc end parallel @@ -164,17 +393,46 @@ program reduction_1 lvresult = lvresult .and. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + + ! + ! '.or.' reductions + ! - lresult = .false. + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. lvresult = .false. - ! '.or.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.or.:lrg) gang + do i = 1, n + lrg = lrg .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.or.:lrw) worker + do i = 1, n + lrw = lrw .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.or.:lrv) vector + do i = 1, n + lrv = lrv .or. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.or.:lresult) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.or.:lrc) gang worker vector do i = 1, n - lresult = lresult .or. (array(i) .ge. 5) + lrc = lrc .or. (array(i) .ge. 5) end do !$acc end parallel @@ -183,17 +441,46 @@ program reduction_1 lvresult = lvresult .or. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.eqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.eqv.:lrg) gang + do i = 1, n + lrg = lrg .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.eqv.:lrw) worker + do i = 1, n + lrw = lrw .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.eqv.:lrv) vector + do i = 1, n + lrv = lrv .eqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.eqv.:lresult) gang + !$acc parallel num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.eqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .eqv. (array(i) .ge. 5) + lrc = lrc .eqv. (array(i) .ge. 5) end do !$acc end parallel @@ -202,17 +489,46 @@ program reduction_1 lvresult = lvresult .eqv. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.neqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.neqv.:lrg) gang + do i = 1, n + lrg = lrg .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.neqv.:lrw) worker + do i = 1, n + lrw = lrw .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.neqv.:lrv) vector + do i = 1, n + lrv = lrv .neqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.neqv.:lresult) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.neqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .neqv. (array(i) .ge. 5) + lrc = lrc .neqv. (array(i) .ge. 5) end do !$acc end parallel @@ -221,5 +537,8 @@ program reduction_1 lvresult = lvresult .neqv. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort end program reduction_1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 index fe6a9c3..cd09099 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 @@ -5,26 +5,52 @@ program reduction_2 implicit none - integer, parameter :: n = 10, gangs = 20 + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 integer :: i - real, parameter :: e = .001 - real :: vresult, result - logical :: lresult, lvresult - real, dimension (n) :: array + real :: vresult, rg, rw, rv, rc + real, parameter :: e = 0.001 + logical :: lrg, lrw, lrv, lrc, lvresult + real, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + array(i) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(+:result) gang + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker do i = 1, n - result = result + array(i) + rw = rw + array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector + do i = 1, n + rc = rc + array(i) end do !$acc end parallel @@ -33,17 +59,46 @@ program reduction_2 vresult = vresult + array(i) end do - if (abs (result - vresult) .ge. e) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! '*' reductions + ! - result = 1 + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 - ! '*' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang + do i = 1, n + rg = rg * array(i) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(*:result) gang + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker do i = 1, n - result = result * array(i) + rw = rw * array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector + do i = 1, n + rc = rc * array(i) end do !$acc end parallel @@ -52,17 +107,46 @@ program reduction_2 vresult = vresult * array(i) end do - if (result.ne.vresult) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + + ! + ! 'max' reductions + ! - result = 0 + rg = 0 + rw = 0 + rg = 0 + rc = 0 vresult = 0 - ! 'max' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(max:rg) gang + do i = 1, n + rg = max (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(max:rw) worker + do i = 1, n + rw = max (rw, array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(max:result) gang + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(max:rv) vector do i = 1, n - result = max (result, array(i)) + rv = max (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(max:rc) gang worker vector + do i = 1, n + rc = max (rc, array(i)) end do !$acc end parallel @@ -71,17 +155,46 @@ program reduction_2 vresult = max (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + ! ! 'min' reductions + ! - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(min:result) gang + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(min:rg) gang + do i = 1, n + rg = min (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(min:rw) worker + do i = 1, n + rw = min (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(min:rv) vector do i = 1, n - result = min (result, array(i)) + rv = min (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(min:rc) gang worker vector + do i = 1, n + rc = min (rc, array(i)) end do !$acc end parallel @@ -90,17 +203,46 @@ program reduction_2 vresult = min (vresult, array(i)) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + ! ! '.and.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.and.:lrg) gang + do i = 1, n + lrg = lrg .and. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.and.:lresult) gang + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.and.:lrw) worker do i = 1, n - lresult = lresult .and. (array(i) .ge. 5) + lrw = lrw .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.and.:lrv) vector + do i = 1, n + lrv = lrv .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.and.:lrc) gang worker vector + do i = 1, n + lrc = lrc .and. (array(i) .ge. 5) end do !$acc end parallel @@ -109,17 +251,46 @@ program reduction_2 lvresult = lvresult .and. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort - lresult = .false. + ! + ! '.or.' reductions + ! + + lrg = .false. + lrw = .false. + lrv = .false. + lrc = .false. lvresult = .false. - ! '.or.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.or.:lrg) gang + do i = 1, n + lrg = lrg .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.or.:lrw) worker + do i = 1, n + lrw = lrw .or. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.or.:lresult) gang + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.or.:lrv) vector do i = 1, n - lresult = lresult .or. (array(i) .ge. 5) + lrv = lrv .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.or.:lrc) gang worker vector + do i = 1, n + lrc = lrc .or. (array(i) .ge. 5) end do !$acc end parallel @@ -128,17 +299,46 @@ program reduction_2 lvresult = lvresult .or. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.eqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.eqv.:lrg) gang + do i = 1, n + lrg = lrg .eqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.eqv.:lresult) gang + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.eqv.:lrw) worker do i = 1, n - lresult = lresult .eqv. (array(i) .ge. 5) + lrw = lrw .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.eqv.:lrv) vector + do i = 1, n + lrv = lrv .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.eqv.:lrc) gang worker vector + do i = 1, n + lrc = lrc .eqv. (array(i) .ge. 5) end do !$acc end parallel @@ -147,17 +347,46 @@ program reduction_2 lvresult = lvresult .eqv. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.neqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.neqv.:lrg) gang + do i = 1, n + lrg = lrg .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.neqv.:lrw) worker + do i = 1, n + lrw = lrw .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.neqv.:lrv) vector + do i = 1, n + lrv = lrv .neqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.neqv.:lresult) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.neqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .neqv. (array(i) .ge. 5) + lrc = lrc .neqv. (array(i) .ge. 5) end do !$acc end parallel @@ -166,5 +395,8 @@ program reduction_2 lvresult = lvresult .neqv. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort end program reduction_2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 index 155b903..a7dbf2b 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 @@ -5,26 +5,52 @@ program reduction_3 implicit none - integer, parameter :: n = 10, gangs = 20 + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 integer :: i - double precision, parameter :: e = .001 - double precision :: vresult, result - logical :: lresult, lvresult + double precision :: vresult, rg, rw, rv, rc + double precision, parameter :: e = 0.001 + logical :: lrg, lrw, lrv, lrc, lvresult double precision, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + array(i) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(+:result) gang + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker do i = 1, n - result = result + array(i) + rw = rw + array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector + do i = 1, n + rc = rc + array(i) end do !$acc end parallel @@ -33,17 +59,46 @@ program reduction_3 vresult = vresult + array(i) end do - if (abs (result - vresult) .ge. e) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + + ! + ! '*' reductions + ! - result = 1 + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 - ! '*' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang + do i = 1, n + rg = rg * array(i) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(*:result) gang + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker do i = 1, n - result = result * array(i) + rw = rw * array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector + do i = 1, n + rc = rc * array(i) end do !$acc end parallel @@ -52,17 +107,46 @@ program reduction_3 vresult = vresult * array(i) end do - if (result.ne.vresult) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + + ! + ! 'max' reductions + ! - result = 0 + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! 'max' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(max:rg) gang + do i = 1, n + rg = max (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(max:rw) worker + do i = 1, n + rw = max (rw, array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(max:result) gang + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(max:rv) vector do i = 1, n - result = max (result, array(i)) + rv = max (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(max:rc) gang worker vector + do i = 1, n + rc = max (rc, array(i)) end do !$acc end parallel @@ -71,17 +155,46 @@ program reduction_3 vresult = max (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + ! ! 'min' reductions + ! - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(min:result) gang + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(min:rg) gang + do i = 1, n + rg = min (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(min:rw) worker + do i = 1, n + rw = min (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(min:rv) vector do i = 1, n - result = min (result, array(i)) + rv = min (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(min:rc) gang worker vector + do i = 1, n + rc = min (rc, array(i)) end do !$acc end parallel @@ -90,17 +203,46 @@ program reduction_3 vresult = min (vresult, array(i)) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + ! ! '.and.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.and.:lrg) gang + do i = 1, n + lrg = lrg .and. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.and.:lresult) gang + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.and.:lrw) worker do i = 1, n - lresult = lresult .and. (array(i) .ge. 5) + lrw = lrw .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.and.:lrv) vector + do i = 1, n + lrv = lrv .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.and.:lrc) gang worker vector + do i = 1, n + lrc = lrc .and. (array(i) .ge. 5) end do !$acc end parallel @@ -109,17 +251,46 @@ program reduction_3 lvresult = lvresult .and. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort - lresult = .false. + ! + ! '.or.' reductions + ! + + lrg = .false. + lrw = .false. + lrv = .false. + lrc = .false. lvresult = .false. - ! '.or.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.or.:lrg) gang + do i = 1, n + lrg = lrg .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.or.:lrw) worker + do i = 1, n + lrw = lrw .or. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.or.:lresult) gang + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.or.:lrv) vector do i = 1, n - lresult = lresult .or. (array(i) .ge. 5) + lrv = lrv .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.or.:lrc) gang worker vector + do i = 1, n + lrc = lrc .or. (array(i) .ge. 5) end do !$acc end parallel @@ -128,17 +299,46 @@ program reduction_3 lvresult = lvresult .or. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.eqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.eqv.:lrg) gang + do i = 1, n + lrg = lrg .eqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.eqv.:lresult) gang + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.eqv.:lrw) worker do i = 1, n - lresult = lresult .eqv. (array(i) .ge. 5) + lrw = lrw .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.eqv.:lrv) vector + do i = 1, n + lrv = lrv .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.eqv.:lrc) gang worker vector + do i = 1, n + lrc = lrc .eqv. (array(i) .ge. 5) end do !$acc end parallel @@ -147,17 +347,46 @@ program reduction_3 lvresult = lvresult .eqv. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.neqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.neqv.:lrg) gang + do i = 1, n + lrg = lrg .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.neqv.:lrw) worker + do i = 1, n + lrw = lrw .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.neqv.:lrv) vector + do i = 1, n + lrv = lrv .neqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(lresult) - !$acc loop reduction(.neqv.:lresult) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.neqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .neqv. (array(i) .ge. 5) + lrc = lrc .neqv. (array(i) .ge. 5) end do !$acc end parallel @@ -166,5 +395,8 @@ program reduction_3 lvresult = lvresult .neqv. (array(i) .ge. 5) end do - if (lresult .neqv. lvresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort end program reduction_3 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 index 8d4f6c1..c3bdaf6 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 @@ -5,50 +5,108 @@ program reduction_4 implicit none - integer, parameter :: n = 10, gangs = 20 + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 integer :: i - complex :: vresult, result + real :: vresult, rg, rw, rv, rc complex, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker + do i = 1, n + rw = rw + REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + REAL(array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs(gangs) copy(result) - !$acc loop reduction(+:result) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector do i = 1, n - result = result + array(i) + rc = rc + REAL(array(i)) end do !$acc end parallel ! Verify the results do i = 1, n - vresult = vresult + array(i) + vresult = vresult + REAL(array(i)) end do - if (result .ne. vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort - result = 1 + ! + ! '*' reductions + ! + + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 - ! '*' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang + do i = 1, n + rg = rg * REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker + do i = 1, n + rw = rw * REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * REAL(array(i)) + end do + !$acc end parallel - !$acc parallel num_gangs (gangs) copy(result) - !$acc loop reduction(*:result) gang + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector do i = 1, n - result = result * array(i) + rc = rc * REAL(array(i)) end do !$acc end parallel ! Verify the results do i = 1, n - vresult = vresult * array(i) + vresult = vresult * REAL(array(i)) end do - if (result .ne. vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort end program reduction_4 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 index 1066fa7..304fe7f 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 @@ -4,9 +4,12 @@ program reduction integer, parameter :: n = 40, c = 10 - integer :: i, vsum, sum + integer :: i, vsum, gs, ws, vs, cs - call redsub (sum, n, c) + call redsub_gang (gs, n, c) + call redsub_worker (gs, n, c) + call redsub_vector (vs, n, c) + call redsub_combined (cs, n, c) vsum = 0 @@ -15,10 +18,11 @@ program reduction vsum = vsum + c end do - if (sum.ne.vsum) call abort () + if (gs .ne. vsum) call abort () + if (vs .ne. vsum) call abort () end program reduction -subroutine redsub(sum, n, c) +subroutine redsub_gang(sum, n, c) integer :: sum, n, c sum = 0 @@ -29,4 +33,43 @@ subroutine redsub(sum, n, c) sum = sum + c end do !$acc end parallel -end subroutine redsub +end subroutine redsub_gang + +subroutine redsub_worker(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel copyin (n, c) num_workers(4) vector_length (32) copy(sum) + !$acc loop reduction(+:sum) worker + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub_worker + +subroutine redsub_vector(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel copyin (n, c) vector_length(32) copy(sum) + !$acc loop reduction(+:sum) vector + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub_vector + +subroutine redsub_combined(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel num_gangs (8) num_workers (4) vector_length(32) copy(sum) + !$acc loop reduction(+:sum) gang worker vector + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub_combined diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 index 2733968..990faac 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 @@ -3,28 +3,91 @@ program reduction implicit none - integer, parameter :: n = 100 - integer :: i, s1, s2, vs1, vs2 + integer, parameter :: n = 100, n2 = 1000, chunksize = 10 + integer :: i, gs1, gs2, ws1, ws2, vs1, vs2, cs1, cs2, hs1, hs2 + integer :: j, red, vred - s1 = 0 - s2 = 0 + gs1 = 0 + gs2 = 0 + ws1 = 0 + ws2 = 0 vs1 = 0 vs2 = 0 + cs1 = 0 + cs2 = 0 + hs1 = 0 + hs2 = 0 - !$acc parallel num_gangs (1000) copy(s1, s2) - !$acc loop reduction(+:s1, s2) gang + !$acc parallel num_gangs (1000) copy(gs1, gs2) + !$acc loop reduction(+:gs1, gs2) gang do i = 1, n - s1 = s1 + 1 - s2 = s2 + 2 + gs1 = gs1 + 1 + gs2 = gs2 + 2 end do !$acc end parallel - ! Verify the results + !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2) + !$acc loop reduction(+:ws1, ws2) worker + do i = 1, n + ws1 = ws1 + 1 + ws2 = ws2 + 2 + end do + !$acc end parallel + + !$acc parallel vector_length (32) copy(vs1, vs2) + !$acc loop reduction(+:vs1, vs2) vector do i = 1, n vs1 = vs1 + 1 vs2 = vs2 + 2 end do + !$acc end parallel + + !$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2) + !$acc loop reduction(+:cs1, cs2) gang worker vector + do i = 1, n + cs1 = cs1 + 1 + cs2 = cs2 + 2 + end do + !$acc end parallel + + ! Verify the results on the host + do i = 1, n + hs1 = hs1 + 1 + hs2 = hs2 + 2 + end do + + if (gs1 .ne. hs1) call abort () + if (gs2 .ne. hs2) call abort () + + if (ws1 .ne. hs1) call abort () + if (ws2 .ne. hs2) call abort () + + if (vs1 .ne. hs1) call abort () + if (vs2 .ne. hs2) call abort () + + if (cs1 .ne. hs1) call abort () + if (cs2 .ne. hs2) call abort () + + ! Nested reductions. + + red = 0 + vred = 0 + + !$acc parallel num_gangs(10) vector_length(32) copy(red) + !$acc loop reduction(+:red) gang + do i = 1, n/chunksize + !$acc loop reduction(+:red) vector + do j = 1, chunksize + red = red + chunksize + end do + end do + !$acc end parallel + + do i = 1, n/chunksize + do j = 1, chunksize + vred = vred + chunksize + end do + end do - if (s1.ne.vs1) call abort () - if (s2.ne.vs2) call abort () + if (red .ne. vred) call abort () end program reduction