Hi! On Thu, 11 May 2017 14:26:51 +0200, I wrote: > Building on the other pending patches (I'll soon commit the approved > ones), we can then support the num_gangs, num_workers, vector_length > clauses for the OpenACC 2.5 kernels construct. OK for trunk?
> OpenACC 2.5 kernels construct: num_gangs, num_workers, vector_length > clauses For now, committed to gomp-4_0-branch in r248031: commit cc2a61ba48e84268e37c53874cb3eef27f5ede1d Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Sun May 14 10:26:07 2017 +0000 OpenACC 2.5 kernels construct: num_gangs, num_workers, vector_length clauses gcc/c/ * c-parser.c (OACC_KERNELS_CLAUSE_MASK) (OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK): Add "PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS", "VECTOR_LENGTH". gcc/cp/ * parser.c (OACC_KERNELS_CLAUSE_MASK) (OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK): Add "PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS", "VECTOR_LENGTH". gcc/fortran/ * openmp.c (OACC_KERNELS_CLAUSES) (OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK): Add "OMP_CLAUSE_NUM_GANGS", "OMP_CLAUSE_NUM_WORKERS", "OMP_CLAUSE_VECTOR_LENGTH". gcc/ * omp-low.c (execute_oacc_device_lower): Remove the parallelism dimensions function attributes for unparallelized OpenACC kernels constructs. gcc/testsuite/ * c-c++-common/goacc/parallel-dims-1.c: Update. * c-c++-common/goacc/parallel-dims-2.c: Likewise. * c-c++-common/goacc/routine-1.c: Likewise. * c-c++-common/goacc/uninit-dim-clause.c: Likewise. * g++.dg/goacc/template.C: Likewise. * gfortran.dg/goacc/kernels-tree.f95: Likewise. * gfortran.dg/goacc/routine-3.f90: Likewise. * gfortran.dg/goacc/sie.f95: Likewise. * gfortran.dg/goacc/uninit-dim-clause.f95: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Update. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@248031 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 6 + gcc/c/ChangeLog.gomp | 7 + gcc/c/c-parser.c | 6 + gcc/cp/ChangeLog.gomp | 7 + gcc/cp/parser.c | 6 + gcc/fortran/ChangeLog.gomp | 7 + gcc/fortran/openmp.c | 6 +- gcc/omp-low.c | 9 + gcc/testsuite/ChangeLog.gomp | 12 + gcc/testsuite/c-c++-common/goacc/parallel-dims-1.c | 4 + gcc/testsuite/c-c++-common/goacc/parallel-dims-2.c | 152 +++++++++++-- gcc/testsuite/c-c++-common/goacc/routine-1.c | 13 ++ .../c-c++-common/goacc/uninit-dim-clause.c | 17 +- gcc/testsuite/g++.dg/goacc/template.C | 4 + gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 | 6 +- gcc/testsuite/gfortran.dg/goacc/routine-3.f90 | 6 + gcc/testsuite/gfortran.dg/goacc/sie.f95 | 84 +++++++ .../gfortran.dg/goacc/uninit-dim-clause.f95 | 18 +- libgomp/ChangeLog.gomp | 6 + .../libgomp.oacc-c-c++-common/acc_prof-kernels-1.c | 244 +++++++++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-2.c | 21 +- .../libgomp.oacc-c-c++-common/parallel-dims.c | 35 +++ .../libgomp.oacc-fortran/kernels-loop-2.f95 | 13 +- 23 files changed, 661 insertions(+), 28 deletions(-) diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index a754647..a4720c3 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,3 +1,9 @@ +2017-05-14 Thomas Schwinge <tho...@codesourcery.com> + + * omp-low.c (execute_oacc_device_lower): Remove the parallelism + dimensions function attributes for unparallelized OpenACC kernels + constructs. + 2017-05-12 Cesar Philippidis <ce...@codesourcery.com> * config/nvptx/nvptx.c (nvptx_goacc_reduction_init): Don't update diff --git gcc/c/ChangeLog.gomp gcc/c/ChangeLog.gomp index 3efcc8b..baedcf8 100644 --- gcc/c/ChangeLog.gomp +++ gcc/c/ChangeLog.gomp @@ -1,3 +1,10 @@ +2017-05-14 Thomas Schwinge <tho...@codesourcery.com> + + * c-parser.c (OACC_KERNELS_CLAUSE_MASK) + (OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK): Add + "PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS", + "VECTOR_LENGTH". + 2017-05-12 Thomas Schwinge <tho...@codesourcery.com> * c-parser.c (c_parser_omp_clause_num_gangs) diff --git gcc/c/c-parser.c gcc/c/c-parser.c index ef61c5f..afc467d 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -13966,11 +13966,17 @@ c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name, | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICE_TYPE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) #define OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK \ ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) #define OACC_PARALLEL_CLAUSE_MASK \ diff --git gcc/cp/ChangeLog.gomp gcc/cp/ChangeLog.gomp index 470f4e7..d59e856 100644 --- gcc/cp/ChangeLog.gomp +++ gcc/cp/ChangeLog.gomp @@ -1,3 +1,10 @@ +2017-05-14 Thomas Schwinge <tho...@codesourcery.com> + + * parser.c (OACC_KERNELS_CLAUSE_MASK) + (OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK): Add + "PRAGMA_OACC_CLAUSE_NUM_GANGS", "PRAGMA_OACC_CLAUSE_NUM_WORKERS", + "VECTOR_LENGTH". + 2017-05-04 Cesar Philippidis <ce...@codesourcery.com> * parser.c (cp_parser_omp_clause_name): Add support for if_present. diff --git gcc/cp/parser.c gcc/cp/parser.c index b9c9747..de42cdd 100644 --- gcc/cp/parser.c +++ gcc/cp/parser.c @@ -35704,11 +35704,17 @@ cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name, | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICE_TYPE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DEVICEPTR) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_PRESENT) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) #define OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK \ ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_GANGS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NUM_WORKERS) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR_LENGTH) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT)) #define OACC_PARALLEL_CLAUSE_MASK \ diff --git gcc/fortran/ChangeLog.gomp gcc/fortran/ChangeLog.gomp index e9d8928..8a6ae6a 100644 --- gcc/fortran/ChangeLog.gomp +++ gcc/fortran/ChangeLog.gomp @@ -1,3 +1,10 @@ +2017-05-14 Thomas Schwinge <tho...@codesourcery.com> + + * openmp.c (OACC_KERNELS_CLAUSES) + (OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK): Add + "OMP_CLAUSE_NUM_GANGS", "OMP_CLAUSE_NUM_WORKERS", + "OMP_CLAUSE_VECTOR_LENGTH". + 2017-05-04 Cesar Philippidis <ce...@codesourcery.com> * gfortran.h (gfc_omp_clauses): Add if_present member. diff --git gcc/fortran/openmp.c gcc/fortran/openmp.c index 12b2430..c7e78bb 100644 --- gcc/fortran/openmp.c +++ gcc/fortran/openmp.c @@ -2049,7 +2049,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, | OMP_CLAUSE_PRIVATE | OMP_CLAUSE_FIRSTPRIVATE | OMP_CLAUSE_DEFAULT \ | OMP_CLAUSE_WAIT | OMP_CLAUSE_DEVICE_TYPE) #define OACC_KERNELS_CLAUSES \ - (omp_mask (OMP_CLAUSE_IF) | OMP_CLAUSE_ASYNC | OMP_CLAUSE_DEVICEPTR \ + (omp_mask (OMP_CLAUSE_IF) | OMP_CLAUSE_ASYNC | OMP_CLAUSE_NUM_GANGS \ + | OMP_CLAUSE_NUM_WORKERS | OMP_CLAUSE_VECTOR_LENGTH | OMP_CLAUSE_DEVICEPTR \ | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \ | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT | OMP_CLAUSE_DEFAULT \ | OMP_CLAUSE_WAIT | OMP_CLAUSE_DEVICE_TYPE) @@ -2093,7 +2094,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, | OMP_CLAUSE_VECTOR | OMP_CLAUSE_AUTO | OMP_CLAUSE_SEQ | OMP_CLAUSE_TILE \ | OMP_CLAUSE_DEVICE_TYPE) #define OACC_KERNELS_CLAUSE_DEVICE_TYPE_MASK \ - (omp_mask (OMP_CLAUSE_ASYNC) | OMP_CLAUSE_WAIT | OMP_CLAUSE_DEVICE_TYPE) + (omp_mask (OMP_CLAUSE_ASYNC) | OMP_CLAUSE_NUM_GANGS | OMP_CLAUSE_NUM_WORKERS \ + | OMP_CLAUSE_VECTOR_LENGTH | OMP_CLAUSE_WAIT | OMP_CLAUSE_DEVICE_TYPE) #define OACC_PARALLEL_CLAUSE_DEVICE_TYPE_MASK \ (omp_mask (OMP_CLAUSE_ASYNC) | OMP_CLAUSE_NUM_GANGS | OMP_CLAUSE_NUM_WORKERS \ | OMP_CLAUSE_VECTOR_LENGTH | OMP_CLAUSE_WAIT | OMP_CLAUSE_DEVICE_TYPE) diff --git gcc/omp-low.c gcc/omp-low.c index 0fbc3ff..ae8b6d9 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -21178,6 +21178,15 @@ execute_oacc_device_lower () = (lookup_attribute ("oacc kernels parallelized", DECL_ATTRIBUTES (current_function_decl)) != NULL); + /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1 + kernels, so remove the parallelism dimensions function attributes + potentially set earlier on. */ + if (is_oacc_kernels && !is_oacc_kernels_parallelized) + { + set_oacc_fn_attrib (current_function_decl, NULL, NULL); + attrs = get_oacc_fn_attrib (current_function_decl); + } + /* Discover, partition and process the loops. */ oacc_loop *loops = oacc_loop_discovery (); int fn_level = oacc_fn_attrib_level (attrs); diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp index c24820d..67f01e8 100644 --- gcc/testsuite/ChangeLog.gomp +++ gcc/testsuite/ChangeLog.gomp @@ -1,3 +1,15 @@ +2017-05-14 Thomas Schwinge <tho...@codesourcery.com> + + * c-c++-common/goacc/parallel-dims-1.c: Update. + * c-c++-common/goacc/parallel-dims-2.c: Likewise. + * c-c++-common/goacc/routine-1.c: Likewise. + * c-c++-common/goacc/uninit-dim-clause.c: Likewise. + * g++.dg/goacc/template.C: Likewise. + * gfortran.dg/goacc/kernels-tree.f95: Likewise. + * gfortran.dg/goacc/routine-3.f90: Likewise. + * gfortran.dg/goacc/sie.f95: Likewise. + * gfortran.dg/goacc/uninit-dim-clause.f95: Likewise. + 2017-05-12 Thomas Schwinge <tho...@codesourcery.com> * c-c++-common/goacc/parallel-dims-1.c: New file. diff --git gcc/testsuite/c-c++-common/goacc/parallel-dims-1.c gcc/testsuite/c-c++-common/goacc/parallel-dims-1.c index 9e4cfaa..6cdbebe 100644 --- gcc/testsuite/c-c++-common/goacc/parallel-dims-1.c +++ gcc/testsuite/c-c++-common/goacc/parallel-dims-1.c @@ -3,6 +3,10 @@ void f(int i) { +#pragma acc kernels \ + num_gangs(i) num_workers(i) vector_length(i) + ; + #pragma acc parallel /* { dg-bogus "region is (gang|worker|vector) partitioned" "" { xfail *-*-* } } */ \ num_gangs(i) num_workers(i) vector_length(i) ; diff --git gcc/testsuite/c-c++-common/goacc/parallel-dims-2.c gcc/testsuite/c-c++-common/goacc/parallel-dims-2.c index 30a3d17..acfbe7f 100644 --- gcc/testsuite/c-c++-common/goacc/parallel-dims-2.c +++ gcc/testsuite/c-c++-common/goacc/parallel-dims-2.c @@ -1,18 +1,15 @@ /* Invalid use of OpenACC parallelism dimensions clauses: num_gangs, num_workers, vector_length. */ -void acc_kernels(int i) +void f(int i, float f) { -#pragma acc kernels num_gangs(i) /* { dg-error "'num_gangs' is not valid for '#pragma acc kernels'" } */ +#pragma acc kernels num_gangs /* { dg-error "expected '\\(' before end of line" } */ ; -#pragma acc kernels num_workers(i) /* { dg-error "'num_workers' is not valid for '#pragma acc kernels'" } */ +#pragma acc kernels num_workers /* { dg-error "expected '\\(' before end of line" } */ ; -#pragma acc kernels vector_length(i) /* { dg-error "'vector_length' is not valid for '#pragma acc kernels'" } */ +#pragma acc kernels vector_length /* { dg-error "expected '\\(' before end of line" } */ ; -} -void acc_parallel(int i, float f) -{ #pragma acc parallel num_gangs /* { dg-error "expected '\\(' before end of line" } */ ; #pragma acc parallel num_workers /* { dg-error "expected '\\(' before end of line" } */ @@ -20,6 +17,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length /* { dg-error "expected '\\(' before end of line" } */ ; + +#pragma acc kernels num_gangs( /* { dg-error "expected (primary-|)expression before end of line" } */ + ; +#pragma acc kernels num_workers( /* { dg-error "expected (primary-|)expression before end of line" } */ + ; +#pragma acc kernels vector_length( /* { dg-error "expected (primary-|)expression before end of line" } */ + ; + #pragma acc parallel num_gangs( /* { dg-error "expected (primary-|)expression before end of line" } */ ; #pragma acc parallel num_workers( /* { dg-error "expected (primary-|)expression before end of line" } */ @@ -27,6 +32,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length( /* { dg-error "expected (primary-|)expression before end of line" } */ ; + +#pragma acc kernels num_gangs() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ + ; +#pragma acc kernels num_workers() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ + ; +#pragma acc kernels vector_length() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ + ; + #pragma acc parallel num_gangs() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ ; #pragma acc parallel num_workers() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ @@ -34,6 +47,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ ; + +#pragma acc kernels num_gangs(1 /* { dg-error "expected '\\)' before end of line" } */ + ; +#pragma acc kernels num_workers(1 /* { dg-error "expected '\\)' before end of line" } */ + ; +#pragma acc kernels vector_length(1 /* { dg-error "expected '\\)' before end of line" } */ + ; + #pragma acc parallel num_gangs(1 /* { dg-error "expected '\\)' before end of line" } */ ; #pragma acc parallel num_workers(1 /* { dg-error "expected '\\)' before end of line" } */ @@ -41,6 +62,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length(1 /* { dg-error "expected '\\)' before end of line" } */ ; + +#pragma acc kernels num_gangs(i /* { dg-error "expected '\\)' before end of line" } */ + ; +#pragma acc kernels num_workers(i /* { dg-error "expected '\\)' before end of line" } */ + ; +#pragma acc kernels vector_length(i /* { dg-error "expected '\\)' before end of line" } */ + ; + #pragma acc parallel num_gangs(i /* { dg-error "expected '\\)' before end of line" } */ ; #pragma acc parallel num_workers(i /* { dg-error "expected '\\)' before end of line" } */ @@ -48,6 +77,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length(i /* { dg-error "expected '\\)' before end of line" } */ ; + +#pragma acc kernels num_gangs(1 i /* { dg-error "expected '\\)' before 'i'" } */ + ; +#pragma acc kernels num_workers(1 i /* { dg-error "expected '\\)' before 'i'" } */ + ; +#pragma acc kernels vector_length(1 i /* { dg-error "expected '\\)' before 'i'" } */ + ; + #pragma acc parallel num_gangs(1 i /* { dg-error "expected '\\)' before 'i'" } */ ; #pragma acc parallel num_workers(1 i /* { dg-error "expected '\\)' before 'i'" } */ @@ -55,6 +92,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length(1 i /* { dg-error "expected '\\)' before 'i'" } */ ; + +#pragma acc kernels num_gangs(1 i) /* { dg-error "expected '\\)' before 'i'" } */ + ; +#pragma acc kernels num_workers(1 i) /* { dg-error "expected '\\)' before 'i'" } */ + ; +#pragma acc kernels vector_length(1 i) /* { dg-error "expected '\\)' before 'i'" } */ + ; + #pragma acc parallel num_gangs(1 i) /* { dg-error "expected '\\)' before 'i'" } */ ; #pragma acc parallel num_workers(1 i) /* { dg-error "expected '\\)' before 'i'" } */ @@ -62,6 +107,17 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length(1 i) /* { dg-error "expected '\\)' before 'i'" } */ ; + +#pragma acc kernels num_gangs(1, i /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ + /* { dg-bogus "expected '\\)' before end of line" "TODO" { xfail c } .-1 } */ + ; +#pragma acc kernels num_workers(1, i /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ + /* { dg-bogus "expected '\\)' before end of line" "TODO" { xfail c } .-1 } */ + ; +#pragma acc kernels vector_length(1, i /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ + /* { dg-bogus "expected '\\)' before end of line" "TODO" { xfail c } .-1 } */ + ; + #pragma acc parallel num_gangs(1, i /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ /* { dg-bogus "expected '\\)' before end of line" "TODO" { xfail c } .-1 } */ ; @@ -72,6 +128,14 @@ void acc_parallel(int i, float f) /* { dg-bogus "expected '\\)' before end of line" "TODO" { xfail c } .-1 } */ ; + +#pragma acc kernels num_gangs(1, i) /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ + ; +#pragma acc kernels num_workers(1, i) /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ + ; +#pragma acc kernels vector_length(1, i) /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ + ; + #pragma acc parallel num_gangs(1, i) /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ ; #pragma acc parallel num_workers(1, i) /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ @@ -79,11 +143,27 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length(1, i) /* { dg-error "expected '\\)' before ',' token" "TODO" { xfail c } } */ ; -#pragma acc parallel num_gangs(num_gangs) /* { dg-error "'num_gangs' (un|was not )declared" } */ + +#pragma acc kernels num_gangs(num_gangs_k) /* { dg-error "'num_gangs_k' (un|was not )declared" } */ ; -#pragma acc parallel num_workers(num_workers) /* { dg-error "'num_workers' (un|was not )declared" } */ +#pragma acc kernels num_workers(num_workers_k) /* { dg-error "'num_workers_k' (un|was not )declared" } */ ; -#pragma acc parallel vector_length(vector_length) /* { dg-error "'vector_length' (un|was not )declared" } */ +#pragma acc kernels vector_length(vector_length_k) /* { dg-error "'vector_length_k' (un|was not )declared" } */ + ; + +#pragma acc parallel num_gangs(num_gangs_p) /* { dg-error "'num_gangs_p' (un|was not )declared" } */ + ; +#pragma acc parallel num_workers(num_workers_p) /* { dg-error "'num_workers_p' (un|was not )declared" } */ + ; +#pragma acc parallel vector_length(vector_length_p) /* { dg-error "'vector_length_p' (un|was not )declared" } */ + ; + + +#pragma acc kernels num_gangs(f) /* { dg-error "'num_gangs' expression must be integral" } */ + ; +#pragma acc kernels num_workers(f) /* { dg-error "'num_workers' expression must be integral" } */ + ; +#pragma acc kernels vector_length(f) /* { dg-error "'vector_length' expression must be integral" } */ ; #pragma acc parallel num_gangs(f) /* { dg-error "'num_gangs' expression must be integral" } */ @@ -93,6 +173,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length(f) /* { dg-error "'vector_length' expression must be integral" } */ ; + +#pragma acc kernels num_gangs((float) 1) /* { dg-error "'num_gangs' expression must be integral" } */ + ; +#pragma acc kernels num_workers((float) 1) /* { dg-error "'num_workers' expression must be integral" } */ + ; +#pragma acc kernels vector_length((float) 1) /* { dg-error "'vector_length' expression must be integral" } */ + ; + #pragma acc parallel num_gangs((float) 1) /* { dg-error "'num_gangs' expression must be integral" } */ ; #pragma acc parallel num_workers((float) 1) /* { dg-error "'num_workers' expression must be integral" } */ @@ -100,6 +188,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length((float) 1) /* { dg-error "'vector_length' expression must be integral" } */ ; + +#pragma acc kernels num_gangs(0) /* { dg-warning "'num_gangs' value must be positive" } */ + ; +#pragma acc kernels num_workers(0) /* { dg-warning "'num_workers' value must be positive" } */ + ; +#pragma acc kernels vector_length(0) /* { dg-warning "'vector_length' value must be positive" } */ + ; + #pragma acc parallel num_gangs(0) /* { dg-warning "'num_gangs' value must be positive" } */ ; #pragma acc parallel num_workers(0) /* { dg-warning "'num_workers' value must be positive" } */ @@ -107,6 +203,14 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length(0) /* { dg-warning "'vector_length' value must be positive" } */ ; + +#pragma acc kernels num_gangs((int) -1.2) /* { dg-warning "'num_gangs' value must be positive" } */ + ; +#pragma acc kernels num_workers((int) -1.2) /* { dg-warning "'num_workers' value must be positive" } */ + ; +#pragma acc kernels vector_length((int) -1.2) /* { dg-warning "'vector_length' value must be positive" } */ + ; + #pragma acc parallel num_gangs((int) -1.2) /* { dg-warning "'num_gangs' value must be positive" } */ ; #pragma acc parallel num_workers((int) -1.2) /* { dg-warning "'num_workers' value must be positive" } */ @@ -114,7 +218,8 @@ void acc_parallel(int i, float f) #pragma acc parallel vector_length((int) -1.2) /* { dg-warning "'vector_length' value must be positive" } */ ; -#pragma acc parallel \ + +#pragma acc kernels \ num_gangs(1) /* { dg-error "too many 'num_gangs' clauses" "" { target c } } */ \ num_workers(1) /* { dg-error "too many 'num_workers' clauses" "" { target c } } */ \ vector_length(1) /* { dg-error "too many 'vector_length' clauses" "" { target c } } */ \ @@ -123,12 +228,31 @@ void acc_parallel(int i, float f) num_gangs(1) /* { dg-error "too many 'num_gangs' clauses" "" { target c++ } } */ ; -#pragma acc parallel \ +#pragma acc parallel \ + num_gangs(1) /* { dg-error "too many 'num_gangs' clauses" "" { target c } } */ \ + num_workers(1) /* { dg-error "too many 'num_workers' clauses" "" { target c } } */ \ + vector_length(1) /* { dg-error "too many 'vector_length' clauses" "" { target c } } */ \ + num_workers(1) /* { dg-error "too many 'num_workers' clauses" "" { target c++ } } */ \ + vector_length(1) /* { dg-error "too many 'vector_length' clauses" "" { target c++ } } */ \ + num_gangs(1) /* { dg-error "too many 'num_gangs' clauses" "" { target c++ } } */ + ; + + +#pragma acc kernels \ + num_gangs(-1) /* { dg-warning "'num_gangs' value must be positive" } */ \ + num_workers() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ \ + vector_length(abc_k) /* { dg-error "'abc_k' (un|was not )declared" } */ \ + num_workers(0.5) /* { dg-error "'num_workers' expression must be integral" } */ \ + vector_length(&f) /* { dg-error "'vector_length' expression must be integral" } */ \ + num_gangs( /* { dg-error "expected (primary-|)expression before end of line" "TODO" { xfail c } } */ + ; + +#pragma acc parallel \ num_gangs(-1) /* { dg-warning "'num_gangs' value must be positive" } */ \ num_workers() /* { dg-error "expected (primary-|)expression before '\\)' token" } */ \ - vector_length(abc) /* { dg-error "'abc' (un|was not )declared" } */ \ + vector_length(abc_p) /* { dg-error "'abc_p' (un|was not )declared" } */ \ num_workers(0.5) /* { dg-error "'num_workers' expression must be integral" } */ \ - vector_length(&acc_parallel) /* { dg-error "'vector_length' expression must be integral" } */ \ + vector_length(&f) /* { dg-error "'vector_length' expression must be integral" } */ \ num_gangs( /* { dg-error "expected (primary-|)expression before end of line" "TODO" { xfail c } } */ ; } diff --git gcc/testsuite/c-c++-common/goacc/routine-1.c gcc/testsuite/c-c++-common/goacc/routine-1.c index a4ecfd3..7389575 100644 --- gcc/testsuite/c-c++-common/goacc/routine-1.c +++ gcc/testsuite/c-c++-common/goacc/routine-1.c @@ -91,6 +91,19 @@ extern void nohost (void); int main () { +#pragma acc kernels num_gangs (32) num_workers (32) vector_length (32) + { + gang (); + worker (); + vector (); + seq (); + bind_f_1 (); + bind_f_1_1 (); + bind_f_2 (); + bind_f_2_1 (); + nohost (); + } + #pragma acc parallel num_gangs (32) num_workers (32) vector_length (32) { gang (); diff --git gcc/testsuite/c-c++-common/goacc/uninit-dim-clause.c gcc/testsuite/c-c++-common/goacc/uninit-dim-clause.c index b6e4c3d..72aacd7 100644 --- gcc/testsuite/c-c++-common/goacc/uninit-dim-clause.c +++ gcc/testsuite/c-c++-common/goacc/uninit-dim-clause.c @@ -1,7 +1,6 @@ /* { dg-additional-options "-Wuninitialized" } */ -int -main (void) +void acc_parallel() { int i, j, k; @@ -16,6 +15,18 @@ main (void) #pragma acc parallel loop vector vector_length(k) /* { dg-warning "is used uninitialized in this function" } */ for (k = 0; k < 1; k++) ; +} + +void acc_kernels() +{ + int i, j, k; + + #pragma acc kernels num_gangs(i) /* { dg-warning "is used uninitialized in this function" } */ + ; + + #pragma acc kernels num_workers(j) /* { dg-warning "is used uninitialized in this function" } */ + ; - return 0; + #pragma acc kernels vector_length(k) /* { dg-warning "is used uninitialized in this function" } */ + ; } diff --git gcc/testsuite/g++.dg/goacc/template.C gcc/testsuite/g++.dg/goacc/template.C index 4bc2596..f4d255c 100644 --- gcc/testsuite/g++.dg/goacc/template.C +++ gcc/testsuite/g++.dg/goacc/template.C @@ -100,6 +100,10 @@ oacc_kernels_copy (T a) float y = 3; double z = 4; +#pragma acc kernels num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a) + for (int i = 0; i < 1; i++) + b = a; + #pragma acc kernels copy (w, x, y, z) { w = accDouble<char>(w); diff --git gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 index 2c237b7..a70f1e7 100644 --- gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 +++ gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 @@ -6,7 +6,8 @@ program test integer :: q, i, j, k, m, n, o, p, r, s, t, u, v, w logical :: l = .true. - !$acc kernels if(l) async copy(i), copyin(j), copyout(k), create(m) & + !$acc kernels if(l) async num_gangs(i) num_workers(i) vector_length(i) & + !$acc copy(i), copyin(j), copyout(k), create(m) & !$acc present(o), pcopy(p), pcopyin(r), pcopyout(s), pcreate(t) & !$acc deviceptr(u) !$acc end kernels @@ -16,6 +17,9 @@ end program test ! { dg-final { scan-tree-dump-times "if" 1 "original" } } ! { dg-final { scan-tree-dump-times "async" 1 "original" } } +! { dg-final { scan-tree-dump-times "num_gangs" 1 "original" } } +! { dg-final { scan-tree-dump-times "num_workers" 1 "original" } } +! { dg-final { scan-tree-dump-times "vector_length" 1 "original" } } ! { dg-final { scan-tree-dump-times "map\\(tofrom:i\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "map\\(to:j\\)" 1 "original" } } diff --git gcc/testsuite/gfortran.dg/goacc/routine-3.f90 gcc/testsuite/gfortran.dg/goacc/routine-3.f90 index ca9b928..6773f62 100644 --- gcc/testsuite/gfortran.dg/goacc/routine-3.f90 +++ gcc/testsuite/gfortran.dg/goacc/routine-3.f90 @@ -4,6 +4,12 @@ CONTAINS INTEGER :: i REAL(KIND=8), ALLOCATABLE :: un(:), ua(:) + !$acc kernels num_gangs(2) num_workers(4) vector_length(32) + DO jj = 1, 100 + un(i) = ua(i) + END DO + !$acc end kernels + !$acc parallel num_gangs(2) num_workers(4) vector_length(32) DO jj = 1, 100 un(i) = ua(i) diff --git gcc/testsuite/gfortran.dg/goacc/sie.f95 gcc/testsuite/gfortran.dg/goacc/sie.f95 index b4dd9ed..3abf2c8 100644 --- gcc/testsuite/gfortran.dg/goacc/sie.f95 +++ gcc/testsuite/gfortran.dg/goacc/sie.f95 @@ -95,6 +95,34 @@ program test !$acc parallel num_gangs("1") ! { dg-error "scalar INTEGER expression" } !$acc end parallel + !$acc kernels num_gangs ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc kernels num_gangs(3) + !$acc end kernels + + !$acc kernels num_gangs(i) + !$acc end kernels + + !$acc kernels num_gangs(i+1) + !$acc end kernels + + !$acc kernels num_gangs(-1) ! { dg-error "must be positive" } + !$acc end kernels + + !$acc kernels num_gangs(0) ! { dg-error "must be positive" } + !$acc end kernels + + !$acc kernels num_gangs() ! { dg-error "Invalid character in name" } + + !$acc kernels num_gangs(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels num_gangs(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels num_gangs("1") ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + !$acc parallel num_workers ! { dg-error "Unclassifiable OpenACC directive" } @@ -124,6 +152,34 @@ program test !$acc parallel num_workers("1") ! { dg-error "scalar INTEGER expression" } !$acc end parallel + !$acc kernels num_workers ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc kernels num_workers(3) + !$acc end kernels + + !$acc kernels num_workers(i) + !$acc end kernels + + !$acc kernels num_workers(i+1) + !$acc end kernels + + !$acc kernels num_workers(-1) ! { dg-error "must be positive" } + !$acc end kernels + + !$acc kernels num_workers(0) ! { dg-error "must be positive" } + !$acc end kernels + + !$acc kernels num_workers() ! { dg-error "Invalid character in name" } + + !$acc kernels num_workers(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels num_workers(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels num_workers("1") ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + !$acc parallel vector_length ! { dg-error "Unclassifiable OpenACC directive" } @@ -153,6 +209,34 @@ program test !$acc parallel vector_length("1") ! { dg-error "scalar INTEGER expression" } !$acc end parallel + !$acc kernels vector_length ! { dg-error "Unclassifiable OpenACC directive" } + + !$acc kernels vector_length(3) + !$acc end kernels + + !$acc kernels vector_length(i) + !$acc end kernels + + !$acc kernels vector_length(i+1) + !$acc end kernels + + !$acc kernels vector_length(-1) ! { dg-error "must be positive" } + !$acc end kernels + + !$acc kernels vector_length(0) ! { dg-error "must be positive" } + !$acc end kernels + + !$acc kernels vector_length() ! { dg-error "Invalid character in name" } + + !$acc kernels vector_length(1.5) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels vector_length(.true.) ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + + !$acc kernels vector_length("1") ! { dg-error "scalar INTEGER expression" } + !$acc end kernels + !$acc loop gang do i = 1,10 diff --git gcc/testsuite/gfortran.dg/goacc/uninit-dim-clause.f95 gcc/testsuite/gfortran.dg/goacc/uninit-dim-clause.f95 index 9db692a..8551140 100644 --- gcc/testsuite/gfortran.dg/goacc/uninit-dim-clause.f95 +++ gcc/testsuite/gfortran.dg/goacc/uninit-dim-clause.f95 @@ -1,6 +1,6 @@ ! { dg-additional-options "-Wuninitialized" } -program test +subroutine acc_parallel implicit none integer :: i, j, k @@ -18,4 +18,18 @@ program test do k = 0, 1 end do !$acc end parallel loop -end program test +end subroutine acc_parallel + +subroutine acc_kernels + implicit none + integer :: i, j, k + + !$acc kernels num_gangs(i) ! { dg-warning "is used uninitialized in this function" } + !$acc end kernels + + !$acc kernels num_workers(j) ! { dg-warning "is used uninitialized in this function" } + !$acc end kernels + + !$acc kernels vector_length(k) ! { dg-warning "is used uninitialized in this function" } + !$acc end kernels +end subroutine acc_kernels diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp index a1627a8..5dc0889 100644 --- libgomp/ChangeLog.gomp +++ libgomp/ChangeLog.gomp @@ -1,5 +1,11 @@ 2017-05-14 Thomas Schwinge <tho...@codesourcery.com> + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: New + file. + * testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: Update. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. + * testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite. * testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c) (check_effective_target_c++): New procs. diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c new file mode 100644 index 0000000..24b5718 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c @@ -0,0 +1,244 @@ +/* Test dispatch of events to callbacks. */ + +#undef NDEBUG +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include <acc_prof.h> + +/* Use explicit copyin clauses, to work around firstprivate optimizations, + which will cause the value at the point of call to be used (*before* any + potential modifications done in callbacks), as opposed to its address being + taken, which then later gets dereferenced (*after* any modifications done in + callbacks). */ +#define COPYIN(...) copyin(__VA_ARGS__) + +/* See the "DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT" reference in + libgomp.texi. */ +#define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0 + +#define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__) + +static int state = -1; +#define STATE_OP(state, op)\ + do \ + { \ + typeof (state) state_o = (state); \ + (void) state_o; \ + (state)op; \ + DEBUG_printf("state: %d -> %d\n", state_o, (state)); \ + } \ + while (0) + +static acc_device_t acc_device_type; +static int acc_device_num; +static int num_gangs, num_workers, vector_length; + +void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + + assert (acc_device_type != acc_device_host); + + assert (state == 0); + STATE_OP (state, = 1); + + assert (prof_info->event_type == acc_ev_enqueue_launch_start); + assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); + assert (prof_info->version == _ACC_PROF_INFO_VERSION); + assert (prof_info->device_type == acc_device_type); + assert (prof_info->device_number == acc_device_num); + assert (prof_info->thread_id == -1); + assert (prof_info->async == acc_async_sync); + assert (prof_info->async_queue == prof_info->async); + assert (prof_info->src_file == NULL); + assert (prof_info->func_name == NULL); + assert (prof_info->line_no == -1); + assert (prof_info->end_line_no == -1); + assert (prof_info->func_line_no == -1); + assert (prof_info->func_end_line_no == -1); + + assert (event_info->launch_event.event_type == prof_info->event_type); + assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES); + assert (event_info->launch_event.parent_construct == acc_construct_parallel); + assert (event_info->launch_event.implicit == 1); + assert (event_info->launch_event.tool_info == NULL); + assert (event_info->launch_event.kernel_name != NULL); + { + char *s = strstr (event_info->launch_event.kernel_name, "main"); + assert (s != NULL); + s = strstr (s, "omp_fn"); + assert (s != NULL); + } + if (num_gangs < 1) + assert (event_info->launch_event.num_gangs >= 1); + else + { +#ifdef __OPTIMIZE__ + assert (event_info->launch_event.num_gangs == num_gangs); +#else + /* No parallelized OpenACC kernels constructs, and unparallelized OpenACC + kernels constructs must get launched as 1 x 1 x 1 kernels. */ + assert (event_info->launch_event.num_gangs == 1); +#endif + } + if (num_workers < 1) + assert (event_info->launch_event.num_workers >= 1); + else + { +#ifdef __OPTIMIZE__ + assert (event_info->launch_event.num_workers == num_workers); +#else + /* See num_gangs above. */ + assert (event_info->launch_event.num_workers == 1); +#endif + } + if (vector_length < 1) + assert (event_info->launch_event.vector_length >= 1); + else if (acc_device_type == acc_device_nvidia) /* ... is special. */ + assert (event_info->launch_event.vector_length == 32); + else + { +#ifdef __OPTIMIZE__ + assert (event_info->launch_event.vector_length == vector_length); +#else + /* See num_gangs above. */ + assert (event_info->launch_event.vector_length == 1); +#endif + } + + if (acc_device_type == acc_device_host) + assert (api_info->device_api == acc_device_api_none); + else + assert (api_info->device_api == acc_device_api_cuda); + assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); + assert (api_info->device_type == prof_info->device_type); + assert (api_info->vendor == -1); + assert (api_info->device_handle == NULL); + assert (api_info->context_handle == NULL); + assert (api_info->async_handle == NULL); +} + +static acc_prof_reg reg; +static acc_prof_reg unreg; +static acc_prof_lookup_func lookup; +void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_) +{ + DEBUG_printf ("%s\n", __FUNCTION__); + + reg = reg_; + unreg = unreg_; + lookup = lookup_; +} + +int main() +{ + STATE_OP (state, = 0); + reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg); + assert (state == 0); + + acc_device_type = acc_get_device_type (); + acc_device_num = acc_get_device_num (acc_device_type); + assert (state == 0); + + /* Parallelism dimensions: compiler/runtime decides. */ + STATE_OP (state, = 0); + num_gangs = num_workers = vector_length = 0; + { +#define N 100 + int x[N]; +#pragma acc kernels + { + for (int i = 0; i < N; ++i) + x[i] = i * i; + } +#ifdef __OPTIMIZE__ + /* TODO. With -O2 optimizations enabled, the compiler believes that here + "state == 0" still holds. It's not yet clear what's going on. + Mis-optimization across the GOMP function call boundary? Per its + gcc/omp-builtins.def definition, BUILT_IN_GOACC_PARALLEL + "GOACC_parallel_keyed" doesn't have a "leaf" attribute, so the compiler + must expect calls back into this compilation unit? */ + asm volatile ("" : : : "memory"); +#endif + if (acc_device_type == acc_device_host) + assert (state == 0); /* No acc_ev_enqueue_launch_start. */ + else + assert (state == 1); + for (int i = 0; i < N; ++i) + if (x[i] != i * i) + __builtin_abort (); +#undef N + } + + /* Parallelism dimensions: literal. */ + STATE_OP (state, = 0); + num_gangs = 30; + num_workers = 3; + vector_length = 5; + { +#define N 100 + int x[N]; +#pragma acc kernels \ + num_gangs (30) num_workers (3) vector_length (5) + /* { dg-prune-output "using vector_length \\(32\\), ignoring 5" } */ + { + for (int i = 0; i < N; ++i) + x[i] = i * i; + } +#ifdef __OPTIMIZE__ + /* TODO. With -O2 optimizations enabled, the compiler believes that here + "state == 0" still holds. It's not yet clear what's going on. + Mis-optimization across the GOMP function call boundary? Per its + gcc/omp-builtins.def definition, BUILT_IN_GOACC_PARALLEL + "GOACC_parallel_keyed" doesn't have a "leaf" attribute, so the compiler + must expect calls back into this compilation unit? */ + asm volatile ("" : : : "memory"); +#endif + if (acc_device_type == acc_device_host) + assert (state == 0); /* No acc_ev_enqueue_launch_start. */ + else + assert (state == 1); + for (int i = 0; i < N; ++i) + if (x[i] != i * i) + __builtin_abort (); +#undef N + } + + /* Parallelism dimensions: variable. */ + STATE_OP (state, = 0); + num_gangs = 22; + num_workers = 5; + vector_length = 7; + { +#define N 100 + int x[N]; +#pragma acc kernels \ + num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length) + /* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */ + { + for (int i = 0; i < N; ++i) + x[i] = i * i; + } +#ifdef __OPTIMIZE__ + /* TODO. With -O2 optimizations enabled, the compiler believes that here + "state == 0" still holds. It's not yet clear what's going on. + Mis-optimization across the GOMP function call boundary? Per its + gcc/omp-builtins.def definition, BUILT_IN_GOACC_PARALLEL + "GOACC_parallel_keyed" doesn't have a "leaf" attribute, so the compiler + must expect calls back into this compilation unit? */ + asm volatile ("" : : : "memory"); +#endif + if (acc_device_type == acc_device_host) + assert (state == 0); /* No acc_ev_enqueue_launch_start. */ + else + assert (state == 1); + for (int i = 0; i < N; ++i) + if (x[i] != i * i) + __builtin_abort (); +#undef N + } + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c index c7592d6..b840888 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c @@ -14,27 +14,40 @@ main (void) b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + /* Parallelism dimensions: compiler/runtime decides. */ #pragma acc kernels copyout (a[0:N]) { for (COUNTERTYPE i = 0; i < N; i++) a[i] = i * 2; } -#pragma acc kernels copyout (b[0:N]) + /* Parallelism dimensions: variable. */ +#pragma acc kernels copyout (b[0:N]) \ + num_gangs (3 + a[3]) num_workers (5 + a[5]) vector_length (7 + a[7]) + /* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */ { for (COUNTERTYPE i = 0; i < N; i++) b[i] = i * 4; } -#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + /* Parallelism dimensions: literal. */ +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) \ + num_gangs (3) num_workers (5) vector_length (7) + /* { dg-prune-output "using vector_length \\(32\\), ignoring 7" } */ { for (COUNTERTYPE ii = 0; ii < N; ii++) c[ii] = a[ii] + b[ii]; } for (COUNTERTYPE i = 0; i < N; i++) - if (c[i] != a[i] + b[i]) - abort (); + { + if (a[i] != i * 2) + abort (); + if (b[i] != i * 4) + abort (); + if (c[i] != a[i] + b[i]) + abort (); + } free (a); free (b); diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index 3458757..1dd6353 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -523,5 +523,40 @@ int main () } + /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1 + kernels even when there are explicit num_gangs, num_workers, or + vector_length clauses. */ + { + int gangs = 5; +#define WORKERS 5 +#define VECTORS 13 + int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; + gangs_min = workers_min = vectors_min = INT_MAX; + gangs_max = workers_max = vectors_max = INT_MIN; +#pragma acc kernels \ + num_gangs (gangs) \ + num_workers (WORKERS) \ + vector_length (VECTORS) + { + /* This is to make the OpenACC kernels construct unparallelizable. */ + asm volatile ("" : : : "memory"); + +#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) + for (int i = 100; i > -100; --i) + { + gangs_min = gangs_max = acc_gang (); + workers_min = workers_max = acc_worker (); + vectors_min = vectors_max = acc_vector (); + } + } + if (gangs_min != 0 || gangs_max != 1 - 1 + || workers_min != 0 || workers_max != 1 - 1 + || vectors_min != 0 || vectors_max != 1 - 1) + __builtin_abort (); +#undef VECTORS +#undef WORKERS + } + + return 0; } diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95 index 163e8d5..b88ca67 100644 --- libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95 +++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95 @@ -6,25 +6,34 @@ program main integer, dimension (0:n-1) :: a, b, c integer :: i, ii + ! Parallelism dimensions: compiler/runtime decides. !$acc kernels copyout (a(0:n-1)) do i = 0, n - 1 a(i) = i * 2 end do !$acc end kernels - !$acc kernels copyout (b(0:n-1)) + ! Parallelism dimensions: variable. + !$acc kernels copyout (b(0:n-1)) & + !$acc num_gangs (3 + a(3)) num_workers (5 + a(5)) vector_length (7 + a(7)) + ! { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } do i = 0, n -1 b(i) = i * 4 end do !$acc end kernels - !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) + ! Parallelism dimensions: literal. + !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) & + !$acc num_gangs (3) num_workers (5) vector_length (7) + ! { dg-prune-output "using vector_length \\(32\\), ignoring 7" } do ii = 0, n - 1 c(ii) = a(ii) + b(ii) end do !$acc end kernels do i = 0, n - 1 + if (a(i) .ne. i * 2) call abort + if (b(i) .ne. i * 4) call abort if (c(i) .ne. a(i) + b(i)) call abort end do Grüße Thomas