This patch for gomp-4_0-branch implements OpenACC 2.5 reference counting of mappings, the finalize clause of the exit data directive, and the corresponding API routines.
Tested without regressions, committed to gomp-4_0-branch. Chung-Lin 2017-05-16 Chung-Lin Tang <clt...@codesourcery.com> gcc/c/ * c-parser.c (c_parser_omp_clause_name): Handle 'finalize' clause. (c_parser_oacc_simple_clause): Add 'finalize' to comments. (c_parser_oacc_all_clauses): Handle PRAGMA_OACC_CLAUSE_FINALIZE. (OACC_EXIT_DATA_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_FINALIZE. * c-typeck.c (c_finish_omp_clauses): Handle OMP_CLAUSE_FINALIZE. gcc/cp/ * parser.c (cp_parser_omp_clause_name): Handle 'finalize' clause. (cp_parser_oacc_simple_clause): Add 'finalize' to comments. (cp_parser_oacc_all_clauses): Handle PRAGMA_OACC_CLAUSE_FINALIZE. (OACC_EXIT_DATA_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_FINALIZE. * semantics.c (finish_omp_clauses): Handle OMP_CLAUSE_FINALIZE. gcc/c-family/ * c-pragma.h (enum pragma_omp_clause): Add PRAGMA_OACC_CLAUSE_FINALIZE. gcc/fortran/ * gfortran.h (struct gfc_omp_clauses): Add 'finalize:1' bitfield. * openmp.c (enum omp_mask2): Add OMP_CLAUSE_FINALIZE. (gfc_match_omp_clauses): Handle 'finalize' clause. (OACC_EXIT_DATA_CLAUSES): Add OMP_CLAUSE_FINALIZE. * trans-openmp.c (gfc_trans_omp_clauses_1): Handle finalize bit. gcc/ * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_FINALIZE. * tree.c (omp_clause_num_ops): Add entry for OMP_CLAUSE_FINALIZE. (omp_clause_code_name): Add "finalize" entry. * omp-low.c (scan_sharing_clauses): Handle OMP_CLAUSE_FINALIZE. (expand_omp_target): Add finalize argument for GOACC_enter_exit_data call. * gimplify.c (gimplify_scan_omp_clauses): Handle OMP_CLAUSE_FINALIZE. (gimplify_adjust_omp_clauses): Likewise. libgomp/ * openacc.h (acc_copyout_finalize): Declare new API function. (acc_copyout_finalize_async): Likewise. (acc_delete_finalize): Likewise. (acc_delete_finalize_async): Likewise. * openacc_lib.h (acc_copyout_finalize): Declare new API function. (acc_copyout_finalize_async): Likewise. (acc_delete_finalize): Likewise. (acc_delete_finalize_async): Likewise. * openacc.f90 (acc_copyout_finalize_32_h): Define. (acc_copyout_finalize_64_h): Likewise. (acc_copyout_finalize_array_h): Likewise. (acc_copyout_finalize_l): Likewise. (acc_copyout_finalize_async_32_h): Define. (acc_copyout_finalize_async_64_h): Likewise. (acc_copyout_finalize_async_array_h): Likewise. (acc_copyout_finalize_async_l): Likewise. (acc_delete_finalize_32_h): Define. (acc_delete_finalize_64_h): Likewise. (acc_delete_finalize_array_h): Likewise. (acc_delete_finalize_l): Likewise. (acc_delete_finalize_async_32_h): Define. (acc_delete_finalize_async_64_h): Likewise. (acc_delete_finalize_async_array_h): Likewise. (acc_delete_finalize_async_l): Likewise. * libgomp.map (OACC_2.5): Add acc_copyout_finalize* and acc_delete_finalize* entries. * libgomp.h (struct splay_tree_key_s): Add 'dynamic_refcount' field. (gomp_acc_remove_pointer): Adjust declaration. (gomp_remove_var): New declaration. * libgomp_g.h (GOACC_enter_exit_data): Adjust declaration. * oacc-mem.c (acc_map_data): Adjust new key refcount to REFCOUNT_INFINITY. (acc_unmap_data): Adjust key refcount to 1 for removal. (present_create_copy): Increment mapping refcounts when mapping exists, initialize dynamic refcount when creating new mapping. (FLAG_FINALIZE): Define macro. (delete_copyout): Adjust delete/copyout handling, add handling for FLAG_FINALIZE. (acc_delete_finalize): Define new API function. (acc_delete_finalize_async): Likewise. (acc_copyout_finalize): Likewise. (acc_copyout_finalize_async): Likewise. (gomp_acc_insert_pointer): Adjust handling. (gomp_acc_remove_pointer): Add finalize parameter, adjust handling. * oacc-parallel.c (GOACC_parallel_keyed): Disable async registering when no copyout needed. (GOACC_enter_exit_data): Add and handle finalize argument, adjust gomp_acc_insert_pointer and gomp_acc_remove_pointer calls. (GOACC_declare): Adjust calls to GOACC_enter_exit_data. * target.c (gomp_map_vars): Initialize dynamic_refcount. (gomp_remove_var): Abstract out key unreferencing into new function. (gomp_unmap_vars): Adjust to call gomp_remove_var. (gomp_unload_image_from_device): Likewise. (gomp_exit_data): Likewise. * testsuite/libgomp.oacc-c-c++-common/data-2.c: Adjust testcase for 2.5 reference counting. * testsuite/libgomp.oacc-c-c++-common/lib-38.c: Likewise. * testsuite/libgomp.oacc-fortran/data-2.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/data-5.c: Likewise. * testsuite/libgomp.oacc-fortran/data-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-6.f90: Likewise.
Index: libgomp/oacc-parallel.c =================================================================== --- libgomp/oacc-parallel.c (revision 248095) +++ libgomp/oacc-parallel.c (revision 248096) @@ -355,7 +355,22 @@ } } else - tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); + { + bool async_unmap = false; + for (size_t i = 0; i < tgt->list_count; i++) + { + splay_tree_key k = tgt->list[i].key; + if (k && k->refcount == 1) + { + async_unmap = true; + break; + } + } + if (async_unmap) + tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); + else + gomp_unmap_vars (tgt, false); + } acc_dev->openacc.async_set_async_func (acc_async_sync); @@ -586,7 +601,7 @@ void GOACC_enter_exit_data (int device, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, - int async, int num_waits, ...) + int async, int finalize, int num_waits, ...) { struct goacc_thread *thr; struct gomp_device_descr *acc_dev; @@ -749,11 +764,9 @@ if (kind == GOMP_MAP_DECLARE_ALLOCATE) gomp_acc_declare_allocate (true, pointer, &hostaddrs[i], &sizes[i], &kinds[i]); - else if (!acc_is_present (hostaddrs[i], sizes[i])) - { - gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i]); - } + else + gomp_acc_insert_pointer (pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -775,12 +788,20 @@ { case GOMP_MAP_DELETE: if (acc_is_present (hostaddrs[i], sizes[i])) - acc_delete (hostaddrs[i], sizes[i]); + { + if (finalize) + acc_delete_finalize (hostaddrs[i], sizes[i]); + else + acc_delete (hostaddrs[i], sizes[i]); + } break; case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: - acc_copyout (hostaddrs[i], sizes[i]); + if (finalize) + acc_copyout_finalize (hostaddrs[i], sizes[i]); + else + acc_copyout (hostaddrs[i], sizes[i]); break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -793,11 +814,12 @@ if (kind == GOMP_MAP_DECLARE_DEALLOCATE) gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], &sizes[i], &kinds[i]); - else if (acc_is_present (hostaddrs[i], sizes[i])) + else { bool copyfrom = (kind == GOMP_MAP_FORCE_FROM || kind == GOMP_MAP_FROM); - gomp_acc_remove_pointer (hostaddrs[i], copyfrom, async, pointer); + gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, + finalize, pointer); /* See the above comment. */ } i += pointer - 1; @@ -1077,7 +1099,7 @@ case GOMP_MAP_POINTER: case GOMP_MAP_DELETE: GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], - &kinds[i], 0, 0); + &kinds[i], 0, 0, 0); break; case GOMP_MAP_FORCE_DEVICEPTR: @@ -1086,12 +1108,12 @@ case GOMP_MAP_ALLOC: if (!acc_is_present (hostaddrs[i], sizes[i])) GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], - &kinds[i], 0, 0); + &kinds[i], 0, 0, 0); break; case GOMP_MAP_TO: GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], - &kinds[i], 0, 0); + &kinds[i], 0, 0, 0); break; @@ -1098,7 +1120,7 @@ case GOMP_MAP_FROM: kinds[i] = GOMP_MAP_FORCE_FROM; GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], - &kinds[i], 0, 0); + &kinds[i], 0, 0, 0); break; case GOMP_MAP_FORCE_PRESENT: Index: libgomp/libgomp_g.h =================================================================== --- libgomp/libgomp_g.h (revision 248095) +++ libgomp/libgomp_g.h (revision 248096) @@ -304,7 +304,7 @@ unsigned short *); extern void GOACC_data_end (void); extern void GOACC_enter_exit_data (int, size_t, void **, - size_t *, unsigned short *, int, int, ...); + size_t *, unsigned short *, int, int, int, ...); extern void GOACC_update (int, size_t, void **, size_t *, unsigned short *, int, int, ...); extern void GOACC_wait (int, int, ...); Index: libgomp/openacc.h =================================================================== --- libgomp/openacc.h (revision 248095) +++ libgomp/openacc.h (revision 248096) @@ -118,6 +118,12 @@ void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; +/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. */ +void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW; +void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW; +void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; + /* Old names. OpenACC does not specify whether these can or must not be macros, inlines or aliases for the new names. */ #define acc_pcreate acc_present_or_create Index: libgomp/libgomp.map =================================================================== --- libgomp/libgomp.map (revision 248095) +++ libgomp/libgomp.map (revision 248096) @@ -388,6 +388,14 @@ acc_copyout_async_32_h_; acc_copyout_async_64_h_; acc_copyout_async_array_h_; + acc_copyout_finalize; + acc_copyout_finalize_32_h_; + acc_copyout_finalize_64_h_; + acc_copyout_finalize_array_h_; + acc_copyout_finalize_async; + acc_copyout_finalize_async_32_h_; + acc_copyout_finalize_async_64_h_; + acc_copyout_finalize_async_array_h_; acc_create_async; acc_create_async_32_h_; acc_create_async_64_h_; @@ -396,6 +404,14 @@ acc_delete_async_32_h_; acc_delete_async_64_h_; acc_delete_async_array_h_; + acc_delete_finalize; + acc_delete_finalize_32_h_; + acc_delete_finalize_64_h_; + acc_delete_finalize_array_h_; + acc_delete_finalize_async; + acc_delete_finalize_async_32_h_; + acc_delete_finalize_async_64_h_; + acc_delete_finalize_async_array_h_; acc_get_default_async; acc_get_default_async_h_; acc_memcpy_from_device_async; Index: libgomp/testsuite/libgomp.oacc-fortran/data-5.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/data-5.f90 (nonexistent) +++ libgomp/testsuite/libgomp.oacc-fortran/data-5.f90 (revision 248096) @@ -0,0 +1,56 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +program refcount_test + use openacc + integer, allocatable :: h(:) + integer i, N + + N = 256 + allocate (h(N)) + + do i = 1, N + h(i) = i + end do + + !$acc enter data create (h(1:N)) + !$acc enter data copyin (h(1:N)) + !$acc enter data copyin (h(1:N)) + !$acc enter data copyin (h(1:N)) + + call acc_update_self (h) + do i = 1, N + if (h(i) .eq. i) c = c + 1 + end do + ! h[] should be filled with uninitialized device values, + ! abort if it's not. + if (c .eq. N) call abort + + h(:) = 0 + + !$acc parallel present (h(1:N)) + do i = 1, N + h(i) = 111 + end do + !$acc end parallel + + ! No actual copyout should happen. + call acc_copyout (h) + do i = 1, N + if (h(i) .ne. 0) call abort + end do + + !$acc exit data delete (h(1:N)) + + ! This should not actually be deleted yet. + if (acc_is_present (h) .eqv. .FALSE.) call abort + + !$acc exit data copyout (h(1:N)) finalize + + do i = 1, N + if (h(i) .ne. 111) call abort + end do + + if (acc_is_present (h) .eqv. .TRUE.) call abort + +end program refcount_test Index: libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 (revision 248095) +++ libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 (revision 248096) @@ -157,8 +157,8 @@ !$acc exit data delete (c(0:N), d(0:N)) - if (acc_is_present (c) .eqv. .TRUE.) call abort - if (acc_is_present (d) .eqv. .TRUE.) call abort + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort !$acc exit data delete (c(0:N), d(0:N)) @@ -177,13 +177,13 @@ !$acc exit data delete (c(0:N), d(0:N)) - if (acc_is_present (c) .eqv. .TRUE.) call abort - if (acc_is_present (d) .eqv. .TRUE.) call abort + !if (acc_is_present (c) .eqv. .TRUE.) call abort + !if (acc_is_present (d) .eqv. .TRUE.) call abort !$acc exit data delete (c(0:N), d(0:N)) - if (acc_is_present (c) .eqv. .TRUE.) call abort - if (acc_is_present (d) .eqv. .TRUE.) call abort + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort !$acc enter data present_or_copyin (c(0:N)) Index: libgomp/testsuite/libgomp.oacc-fortran/data-6.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/data-6.f90 (nonexistent) +++ libgomp/testsuite/libgomp.oacc-fortran/data-6.f90 (revision 248096) @@ -0,0 +1,26 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +program refcount_test + use openacc + integer, allocatable :: h(:) + integer i, N + + N = 256 + allocate (h(N)) + + do i = 1, N + h(i) = i + end do + + !$acc data create (h(1:N)) + !$acc enter data create (h(1:N)) + !$acc end data + + if (acc_is_present (h) .eqv. .FALSE.) call abort + + !$acc exit data delete (h(1:N)) + + if (acc_is_present (h) .eqv. .TRUE.) call abort + +end program refcount_test Index: libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c (revision 248095) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c (revision 248096) @@ -38,7 +38,7 @@ memset (&h[0], 0, N); - acc_copyout (h, N); + acc_copyout_finalize (h, N); for (i = 0; i < N; i++) { Index: libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c (revision 248095) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c (revision 248096) @@ -268,10 +268,10 @@ #pragma acc exit data delete (a[0:N], b[0:N]) - if (acc_is_present (a, nbytes)) + if (!acc_is_present (a, nbytes)) abort (); - if (acc_is_present (b, nbytes)) + if (!acc_is_present (b, nbytes)) abort (); #pragma acc exit data delete (a[0:N], b[0:N]) @@ -300,10 +300,10 @@ #pragma acc exit data delete (a[0:N], b[0:N]) - if (acc_is_present (a, nbytes)) + if (!acc_is_present (a, nbytes)) abort (); - if (acc_is_present (b, nbytes)) + if (!acc_is_present (b, nbytes)) abort (); #pragma acc exit data delete (a[0:N], b[0:N]) Index: libgomp/testsuite/libgomp.oacc-c-c++-common/data-4.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-4.c (nonexistent) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-4.c (revision 248096) @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + +#pragma acc data create (h[0:N]) + { + #pragma acc enter data create (h[0:N]) + } + + if (!acc_is_present (h, N)) + abort (); + +#pragma acc exit data delete (h[0:N]) + + if (acc_is_present (h, N)) + abort (); + + free (h); + return 0; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/data-5.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-5.c (nonexistent) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-5.c (revision 248096) @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include <string.h> +#include <stdlib.h> +#include <openacc.h> + +int +main (int argc, char **argv) +{ + const int N = 256; + int i, c; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + h[i] = i; + + #pragma acc enter data create (h[0:N]) + #pragma acc enter data copyin (h[0:N]) + #pragma acc enter data copyin (h[0:N]) + #pragma acc enter data copyin (h[0:N]) + + acc_update_self (h, N); + for (i = 0, c = 0; i < N; i++) + if (h[i] == i) + c++; + /* h[] should be filled with uninitialized device values, + abort if it's not. */ + if (c == N) + abort (); + + for (i = 0; i < N; i++) + h[i] = 0; + + #pragma acc parallel present(h[0:N]) + { + for (i = 0; i < N; i++) + h[i] = 111; + } + + /* No actual copyout should happen. */ + acc_copyout (h, N); + for (i = 0; i < N; i++) + if (h[i] != 0) + abort (); + + #pragma acc exit data delete (h[0:N]) + /* This should not actually be deleted yet. */ + if (!acc_is_present (h, N)) + abort (); + + #pragma acc exit data copyout (h[0:N]) finalize + + for (i = 0; i < N; i++) + if (h[i] != 111) + abort (); + + if (acc_is_present (h, N)) + abort (); + + free (h); + return 0; +} Index: libgomp/target.c =================================================================== --- libgomp/target.c (revision 248095) +++ libgomp/target.c (revision 248096) @@ -984,6 +984,7 @@ tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; + k->dynamic_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1242,6 +1243,23 @@ free (tgt); } +attribute_hidden bool +gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) +{ + bool is_tgt_unmapped = false; + splay_tree_remove (&devicep->mem_map, k); + if (k->link_key) + splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + { + is_tgt_unmapped = true; + gomp_unmap_tgt (k->tgt); + } + return is_tgt_unmapped; +} + /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant variables back from device to host: if it is false, it is assumed that this has been done already. */ @@ -1290,16 +1308,7 @@ + tgt->list[i].offset), tgt->list[i].length); if (do_unmap) - { - splay_tree_remove (&devicep->mem_map, k); - if (k->link_key) - splay_tree_insert (&devicep->mem_map, - (splay_tree_node) k->link_key); - if (k->tgt->refcount > 1) - k->tgt->refcount--; - else - gomp_unmap_tgt (k->tgt); - } + gomp_remove_var (devicep, k); } if (tgt->refcount > 1) @@ -1536,17 +1545,7 @@ else { splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); - splay_tree_remove (&devicep->mem_map, n); - if (n->link_key) - { - if (n->tgt->refcount > 1) - n->tgt->refcount--; - else - { - is_tgt_unmapped = true; - gomp_unmap_tgt (n->tgt); - } - } + is_tgt_unmapped = gomp_remove_var (devicep, n); } } @@ -2229,16 +2228,7 @@ - k->host_start), cur_node.host_end - cur_node.host_start); if (k->refcount == 0) - { - splay_tree_remove (&devicep->mem_map, k); - if (k->link_key) - splay_tree_insert (&devicep->mem_map, - (splay_tree_node) k->link_key); - if (k->tgt->refcount > 1) - k->tgt->refcount--; - else - gomp_unmap_tgt (k->tgt); - } + gomp_remove_var (devicep, k); break; default: Index: libgomp/oacc-mem.c =================================================================== --- libgomp/oacc-mem.c (revision 248095) +++ libgomp/oacc-mem.c (revision 248096) @@ -440,6 +440,7 @@ tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, &kinds, true, GOMP_MAP_VARS_OPENACC); + tgt->list[0].key->refcount = REFCOUNT_INFINITY; } gomp_mutex_lock (&acc_dev->lock); @@ -494,6 +495,9 @@ (void *) n->host_start, (int) host_size, (void *) h); } + /* Mark for removal. */ + n->refcount = 1; + t = n->tgt; if (t->refcount == 2) @@ -583,6 +587,11 @@ gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); } + if (n->refcount != REFCOUNT_INFINITY) + { + n->refcount++; + n->dynamic_refcount++; + } gomp_mutex_unlock (&acc_dev->lock); } else if (!(f & FLAG_CREATE)) @@ -609,6 +618,8 @@ tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, GOMP_MAP_VARS_OPENACC); + /* Initialize dynamic refcount. */ + tgt->list[0].key->dynamic_refcount = 1; if (async > acc_async_sync) acc_dev->openacc.async_set_async_func (acc_async_sync); @@ -678,7 +689,8 @@ } #endif -#define FLAG_COPYOUT (1 << 0) +#define FLAG_COPYOUT (1 << 0) +#define FLAG_FINALIZE (1 << 1) static void delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) @@ -729,22 +741,58 @@ (void *) n->host_start, (int) host_size, (void *) h, (int) s); } - gomp_mutex_unlock (&acc_dev->lock); + if (n->refcount == REFCOUNT_INFINITY) + { + n->refcount = 0; + n->dynamic_refcount = 0; + } + if (n->refcount < n->dynamic_refcount) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("Dynamic reference counting assert fail\n"); + } - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (async); + if (f & FLAG_FINALIZE) + { + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; + } + else if (n->dynamic_refcount) + { + n->dynamic_refcount--; + n->refcount--; + } - if (f & FLAG_COPYOUT) - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + if (n->refcount == 0) + { + if (n->tgt->refcount == 2) + { + struct target_mem_desc *tp, *t; + for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; + tp = t, t = t->prev) + if (n->tgt == t) + { + if (tp) + tp->prev = t->prev; + else + acc_dev->openacc.data_environ = t->prev; + break; + } + } - acc_unmap_data (h); + if (f & FLAG_COPYOUT) + { + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); + } + gomp_remove_var (acc_dev, n); + } - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (acc_async_sync); + gomp_mutex_unlock (&acc_dev->lock); - if (!acc_dev->free_func (acc_dev->target_id, d)) - gomp_fatal ("error in freeing device memory in %s", libfnname); - if (profiling_setup_p) { thr->prof_info = NULL; @@ -765,6 +813,18 @@ } void +acc_delete_finalize (void *h , size_t s) +{ + delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__); +} + +void +acc_delete_finalize_async (void *h , size_t s, int async) +{ + delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__); +} + +void acc_copyout (void *h, size_t s) { delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__); @@ -776,6 +836,19 @@ delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__); } +void +acc_copyout_finalize (void *h, size_t s) +{ + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync, + __FUNCTION__); +} + +void +acc_copyout_finalize_async (void *h, size_t s, int async) +{ + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__); +} + static void update_dev_host (int is_dev, void *h, size_t s, int async) { @@ -895,11 +968,37 @@ struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + if (acc_is_present (*hostaddrs, *sizes)) + { + splay_tree_key n; + gomp_mutex_lock (&acc_dev->lock); + n = lookup_host (acc_dev, *hostaddrs, *sizes); + gomp_mutex_unlock (&acc_dev->lock); + + tgt = n->tgt; + for (size_t i = 0; i < tgt->list_count; i++) + if (tgt->list[i].key == n) + { + for (size_t j = 0; j < mapnum; j++) + if (i + j < tgt->list_count && tgt->list[i + j].key) + { + tgt->list[i + j].key->refcount++; + tgt->list[i + j].key->dynamic_refcount++; + } + return; + } + /* Should not reach here. */ + gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset"); + } + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + /* Initialize dynamic refcount. */ + tgt->list[0].key->dynamic_refcount = 1; + gomp_mutex_lock (&acc_dev->lock); tgt->prev = acc_dev->openacc.data_environ; acc_dev->openacc.data_environ = tgt; @@ -907,7 +1006,8 @@ } void -gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) +gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, + int finalize, int mapnum) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; @@ -915,6 +1015,9 @@ struct target_mem_desc *t; int minrefs = (mapnum == 1) ? 2 : 3; + if (!acc_is_present (h, s)) + return; + gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, 1); @@ -929,37 +1032,64 @@ t = n->tgt; - struct target_mem_desc *tp; + if (n->refcount < n->dynamic_refcount) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("Dynamic reference counting assert fail\n"); + } - if (t->refcount == minrefs) + if (finalize) { - /* This is the last reference, so pull the descriptor off the - chain. This pevents gomp_unmap_vars via gomp_unmap_tgt from - freeing the device memory. */ + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; + } + else if (n->dynamic_refcount) + { + n->dynamic_refcount--; + n->refcount--; + } - for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; - tp = t, t = t->prev) + gomp_mutex_unlock (&acc_dev->lock); + + if (n->refcount == 0) + { + if (t->refcount == minrefs) { - if (n->tgt == t) + /* This is the last reference, so pull the descriptor off the + chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from + freeing the device memory. */ + struct target_mem_desc *tp; + for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; + tp = t, t = t->prev) { - if (tp) - tp->prev = t->prev; - else - acc_dev->openacc.data_environ = t->prev; - break; + if (n->tgt == t) + { + if (tp) + tp->prev = t->prev; + else + acc_dev->openacc.data_environ = t->prev; + break; + } } } + + /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */ + n->refcount = 1; + t->refcount = minrefs; + for (size_t i = 0; i < t->list_count; i++) + if (t->list[i].key == n) + { + t->list[i].copy_from = force_copyfrom ? 1 : 0; + break; + } + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + gomp_unmap_vars (t, true); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); } - t->list[0].copy_from = force_copyfrom ? 1 : 0; - gomp_mutex_unlock (&acc_dev->lock); - /* If running synchronously, unmap immediately. */ - if (async < acc_async_noval) - gomp_unmap_vars (t, true); - else - t->device_descr->openacc.register_async_cleanup_func (t, async); - gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); } Index: libgomp/openacc.f90 =================================================================== --- libgomp/openacc.f90 (revision 248095) +++ libgomp/openacc.f90 (revision 248096) @@ -233,6 +233,24 @@ type (*), dimension (..), contiguous :: a end subroutine + subroutine acc_copyout_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyout_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyout_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + subroutine acc_delete_32_h (a, len) use iso_c_binding, only: c_int32_t !GCC$ ATTRIBUTES NO_ARG_CHECK :: a @@ -251,6 +269,24 @@ type (*), dimension (..), contiguous :: a end subroutine + subroutine acc_delete_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_delete_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_delete_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + subroutine acc_update_device_32_h (a, len) use iso_c_binding, only: c_int32_t !GCC$ ATTRIBUTES NO_ARG_CHECK :: a @@ -380,6 +416,30 @@ integer (acc_handle_kind) async end subroutine + subroutine acc_copyout_finalize_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_finalize_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_finalize_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine + subroutine acc_delete_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t use openacc_kinds, only: acc_handle_kind @@ -404,6 +464,30 @@ integer (acc_handle_kind) async end subroutine + subroutine acc_delete_finalize_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_finalize_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_finalize_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine + subroutine acc_update_device_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t use openacc_kinds, only: acc_handle_kind @@ -581,6 +665,14 @@ integer (c_size_t), value :: len end subroutine + subroutine acc_copyout_finalize_l (a, len) & + bind (C, name = "acc_copyout_finalize") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + subroutine acc_delete_l (a, len) & bind (C, name = "acc_delete") use iso_c_binding, only: c_size_t @@ -589,6 +681,14 @@ integer (c_size_t), value :: len end subroutine + subroutine acc_delete_finalize_l (a, len) & + bind (C, name = "acc_delete_finalize") + use iso_c_binding, only: c_size_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + end subroutine + subroutine acc_update_device_l (a, len) & bind (C, name = "acc_update_device") use iso_c_binding, only: c_size_t @@ -641,6 +741,15 @@ integer (c_int), value :: async end subroutine + subroutine acc_copyout_finalize_async_l (a, len, async) & + bind (C, name = "acc_copyout_finalize_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine + subroutine acc_delete_async_l (a, len, async) & bind (C, name = "acc_delete_async") use iso_c_binding, only: c_size_t, c_int @@ -650,6 +759,15 @@ integer (c_int), value :: async end subroutine + subroutine acc_delete_finalize_async_l (a, len, async) & + bind (C, name = "acc_delete_finalize_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine + subroutine acc_update_device_async_l (a, len, async) & bind (C, name = "acc_update_device_async") use iso_c_binding, only: c_size_t, c_int @@ -806,6 +924,12 @@ procedure :: acc_copyout_array_h end interface + interface acc_copyout_finalize + procedure :: acc_copyout_finalize_32_h + procedure :: acc_copyout_finalize_64_h + procedure :: acc_copyout_finalize_array_h + end interface + interface acc_delete procedure :: acc_delete_32_h procedure :: acc_delete_64_h @@ -812,6 +936,12 @@ procedure :: acc_delete_array_h end interface + interface acc_delete_finalize + procedure :: acc_delete_finalize_32_h + procedure :: acc_delete_finalize_64_h + procedure :: acc_delete_finalize_array_h + end interface + interface acc_update_device procedure :: acc_update_device_32_h procedure :: acc_update_device_64_h @@ -856,6 +986,12 @@ procedure :: acc_copyout_async_array_h end interface + interface acc_copyout_finalize_async + procedure :: acc_copyout_finalize_async_32_h + procedure :: acc_copyout_finalize_async_64_h + procedure :: acc_copyout_finalize_async_array_h + end interface + interface acc_delete_async procedure :: acc_delete_async_32_h procedure :: acc_delete_async_64_h @@ -862,6 +998,12 @@ procedure :: acc_delete_async_array_h end interface + interface acc_delete_finalize_async + procedure :: acc_delete_finalize_async_32_h + procedure :: acc_delete_finalize_async_64_h + procedure :: acc_delete_finalize_async_array_h + end interface + interface acc_update_device_async procedure :: acc_update_device_async_32_h procedure :: acc_update_device_async_64_h @@ -1104,6 +1246,30 @@ call acc_copyout_l (a, sizeof (a)) end subroutine +subroutine acc_copyout_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_copyout_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_copyout_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyout_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_copyout_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_copyout_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_copyout_finalize_array_h (a) + use openacc_internal, only: acc_copyout_finalize_l + type (*), dimension (..), contiguous :: a + call acc_copyout_finalize_l (a, sizeof (a)) +end subroutine + subroutine acc_delete_32_h (a, len) use iso_c_binding, only: c_int32_t, c_size_t use openacc_internal, only: acc_delete_l @@ -1128,6 +1294,30 @@ call acc_delete_l (a, sizeof (a)) end subroutine +subroutine acc_delete_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t, c_size_t + use openacc_internal, only: acc_delete_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + call acc_delete_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_delete_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t, c_size_t + use openacc_internal, only: acc_delete_finalize_l + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + call acc_delete_finalize_l (a, int (len, kind = c_size_t)) +end subroutine + +subroutine acc_delete_finalize_array_h (a) + use openacc_internal, only: acc_delete_finalize_l + type (*), dimension (..), contiguous :: a + call acc_delete_finalize_l (a, sizeof (a)) +end subroutine + subroutine acc_update_device_32_h (a, len) use iso_c_binding, only: c_int32_t, c_size_t use openacc_internal, only: acc_update_device_l @@ -1304,6 +1494,37 @@ call acc_copyout_async_l (a, sizeof (a), int (async, kind = c_int)) end subroutine +subroutine acc_copyout_finalize_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_copyout_finalize_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_copyout_finalize_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_copyout_finalize_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_copyout_finalize_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_copyout_finalize_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_copyout_finalize_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_copyout_finalize_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_copyout_finalize_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine + subroutine acc_delete_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t, c_size_t, c_int use openacc_internal, only: acc_delete_async_l @@ -1335,6 +1556,37 @@ call acc_delete_async_l (a, sizeof (a), int (async, kind = c_int)) end subroutine +subroutine acc_delete_finalize_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_delete_finalize_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_delete_finalize_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_delete_finalize_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_delete_finalize_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_delete_finalize_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_delete_finalize_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_delete_finalize_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_delete_finalize_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine + subroutine acc_update_device_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t, c_size_t, c_int use openacc_internal, only: acc_update_device_async_l Index: libgomp/libgomp.h =================================================================== --- libgomp/libgomp.h (revision 248095) +++ libgomp/libgomp.h (revision 248096) @@ -835,6 +835,8 @@ uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; + /* Dynamic reference count. */ + uintptr_t dynamic_refcount; /* Pointer to the original mapping of "omp declare target link" object. */ splay_tree_key link_key; }; @@ -973,7 +975,7 @@ }; extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); -extern void gomp_acc_remove_pointer (void *, bool, int, int); +extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int); extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, unsigned short *); @@ -985,6 +987,7 @@ extern void gomp_init_device (struct gomp_device_descr *); extern void gomp_unload_device (struct gomp_device_descr *); extern bool gomp_offload_target_available_p (int); +extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key); /* work.c */ Index: libgomp/openacc_lib.h =================================================================== --- libgomp/openacc_lib.h (revision 248095) +++ libgomp/openacc_lib.h (revision 248096) @@ -303,6 +303,26 @@ end subroutine end interface + interface acc_copyout_finalize + subroutine acc_copyout_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_copyout_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_copyout_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + interface acc_delete subroutine acc_delete_32_h (a, len) use iso_c_binding, only: c_int32_t @@ -323,6 +343,26 @@ end subroutine end interface + interface acc_delete_finalize + subroutine acc_delete_finalize_32_h (a, len) + use iso_c_binding, only: c_int32_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + end subroutine + + subroutine acc_delete_finalize_64_h (a, len) + use iso_c_binding, only: c_int64_t + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + end subroutine + + subroutine acc_delete_finalize_array_h (a) + type (*), dimension (..), contiguous :: a + end subroutine + end interface + interface acc_update_device subroutine acc_update_device_32_h (a, len) use iso_c_binding, only: c_int32_t @@ -472,6 +512,32 @@ end subroutine end interface + interface acc_copyout_finalize_async + subroutine acc_copyout_finalize_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_finalize_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_finalize_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface + interface acc_delete_async subroutine acc_delete_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t @@ -498,6 +564,32 @@ end subroutine end interface + interface acc_delete_finalize_async + subroutine acc_delete_finalize_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_finalize_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_finalize_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface + interface acc_update_device_async subroutine acc_update_device_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t Index: gcc/c-family/c-pragma.h =================================================================== --- gcc/c-family/c-pragma.h (revision 248095) +++ gcc/c-family/c-pragma.h (revision 248096) @@ -157,6 +157,7 @@ PRAGMA_OACC_CLAUSE_DEVICEPTR, PRAGMA_OACC_CLAUSE_DEVICE_RESIDENT, PRAGMA_OACC_CLAUSE_DEVICE_TYPE, + PRAGMA_OACC_CLAUSE_FINALIZE, PRAGMA_OACC_CLAUSE_GANG, PRAGMA_OACC_CLAUSE_HOST, PRAGMA_OACC_CLAUSE_INDEPENDENT, Index: gcc/c/c-parser.c =================================================================== --- gcc/c/c-parser.c (revision 248095) +++ gcc/c/c-parser.c (revision 248096) @@ -10375,6 +10375,8 @@ case 'f': if (!strcmp ("final", p)) result = PRAGMA_OMP_CLAUSE_FINAL; + else if (!strcmp ("finalize", p)) + result = PRAGMA_OACC_CLAUSE_FINALIZE; else if (!strcmp ("firstprivate", p)) result = PRAGMA_OMP_CLAUSE_FIRSTPRIVATE; else if (!strcmp ("from", p)) @@ -11693,8 +11695,9 @@ return list; } -/* OpenACC: +/* OpenACC 2.5: auto + finalize independent nohost seq */ @@ -13171,6 +13174,11 @@ c_name = "device_type"; seen_dtype = true; break; + case PRAGMA_OACC_CLAUSE_FINALIZE: + clauses = c_parser_oacc_simple_clause (parser, here, + OMP_CLAUSE_FINALIZE, clauses); + c_name = "finalize"; + break; case PRAGMA_OACC_CLAUSE_FIRSTPRIVATE: clauses = c_parser_omp_clause_firstprivate (parser, clauses); c_name = "firstprivate"; @@ -13816,6 +13824,7 @@ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DELETE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_FINALIZE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) static void Index: gcc/c/c-typeck.c =================================================================== --- gcc/c/c-typeck.c (revision 248095) +++ gcc/c/c-typeck.c (revision 248096) @@ -13397,6 +13397,7 @@ case OMP_CLAUSE_NOHOST: case OMP_CLAUSE_TILE: case OMP_CLAUSE_IF_PRESENT: + case OMP_CLAUSE_FINALIZE: pc = &OMP_CLAUSE_CHAIN (c); continue; Index: gcc/tree.c =================================================================== --- gcc/tree.c (revision 248095) +++ gcc/tree.c (revision 248096) @@ -331,7 +331,8 @@ 3, /* OMP_CLAUSE_TILE */ 2, /* OMP_CLAUSE__GRIDDIM_ */ 0, /* OMP_CLAUSE_IF_PRESENT */ - 2 /* OMP_CLAUSE_DEVICE_TYPE */ + 2, /* OMP_CLAUSE_DEVICE_TYPE */ + 0 /* OMP_CLAUSE_FINALIZE */ }; const char * const omp_clause_code_name[] = @@ -406,7 +407,8 @@ "tile", "_griddim_", "if_present", - "device_type" + "device_type", + "finalize" }; @@ -11723,6 +11725,7 @@ case OMP_CLAUSE_NOHOST: case OMP_CLAUSE_TILE: case OMP_CLAUSE_IF_PRESENT: + case OMP_CLAUSE_FINALIZE: WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); case OMP_CLAUSE_DEVICE_TYPE: Index: gcc/omp-low.c =================================================================== --- gcc/omp-low.c (revision 248095) +++ gcc/omp-low.c (revision 248096) @@ -2431,6 +2431,7 @@ case OMP_CLAUSE_TILE: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_DEVICE_TYPE: + case OMP_CLAUSE_FINALIZE: break; case OMP_CLAUSE_ALIGNED: @@ -2606,6 +2607,7 @@ case OMP_CLAUSE__GRIDDIM_: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_DEVICE_TYPE: + case OMP_CLAUSE_FINALIZE: break; case OMP_CLAUSE_BIND: @@ -14216,6 +14218,13 @@ if (t_async) args.safe_push (t_async); + if (start_ix == BUILT_IN_GOACC_ENTER_EXIT_DATA) + { + c = find_omp_clause (clauses, OMP_CLAUSE_FINALIZE); + tree t_finalize = c ? integer_one_node : integer_zero_node; + args.safe_push (t_finalize); + } + /* Save the argument index, and ... */ unsigned t_wait_idx = args.length (); unsigned num_waits = 0; Index: gcc/cp/semantics.c =================================================================== --- gcc/cp/semantics.c (revision 248095) +++ gcc/cp/semantics.c (revision 248096) @@ -7107,6 +7107,7 @@ case OMP_CLAUSE_BIND: case OMP_CLAUSE_NOHOST: case OMP_CLAUSE_IF_PRESENT: + case OMP_CLAUSE_FINALIZE: break; case OMP_CLAUSE_TILE: Index: gcc/cp/parser.c =================================================================== --- gcc/cp/parser.c (revision 248095) +++ gcc/cp/parser.c (revision 248096) @@ -29815,6 +29815,8 @@ case 'f': if (!strcmp ("final", p)) result = PRAGMA_OMP_CLAUSE_FINAL; + else if (!strcmp ("finalize", p)) + result = PRAGMA_OACC_CLAUSE_FINALIZE; else if (!strcmp ("firstprivate", p)) result = PRAGMA_OMP_CLAUSE_FIRSTPRIVATE; else if (!strcmp ("from", p)) @@ -30275,8 +30277,9 @@ return list; } -/* OpenACC 2.0: +/* OpenACC 2.5: auto + finalize independent nohost seq */ @@ -32390,6 +32393,11 @@ c_name = "device_type"; seen_dtype = true; break; + case PRAGMA_OACC_CLAUSE_FINALIZE: + clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_FINALIZE, + clauses, here); + c_name = "finalize"; + break; case PRAGMA_OACC_CLAUSE_FIRSTPRIVATE: clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_FIRSTPRIVATE, clauses); @@ -35582,6 +35590,7 @@ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_COPYOUT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_DELETE) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_FINALIZE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WAIT) ) static tree Index: gcc/fortran/openmp.c =================================================================== --- gcc/fortran/openmp.c (revision 248095) +++ gcc/fortran/openmp.c (revision 248096) @@ -835,6 +835,7 @@ OMP_CLAUSE_NOHOST, OMP_CLAUSE_IF_PRESENT, OMP_CLAUSE_DEVICE_TYPE, + OMP_CLAUSE_FINALIZE, /* This must come last. */ OMP_MASK2_LAST }; @@ -1304,6 +1305,14 @@ && c->final_expr == NULL && gfc_match ("final ( %e )", &c->final_expr) == MATCH_YES) continue; + if ((mask & OMP_CLAUSE_FINALIZE) + && !c->finalize + && gfc_match ("finalize") == MATCH_YES) + { + c->finalize = true; + needs_space = true; + continue; + } if ((mask & OMP_CLAUSE_FIRSTPRIVATE) && gfc_match_omp_variable_list ("firstprivate (", &c->lists[OMP_LIST_FIRSTPRIVATE], @@ -2081,7 +2090,7 @@ | OMP_CLAUSE_COPYIN | OMP_CLAUSE_CREATE) #define OACC_EXIT_DATA_CLAUSES \ (omp_mask (OMP_CLAUSE_IF) | OMP_CLAUSE_ASYNC | OMP_CLAUSE_WAIT \ - | OMP_CLAUSE_COPYOUT | OMP_CLAUSE_DELETE) + | OMP_CLAUSE_COPYOUT | OMP_CLAUSE_DELETE | OMP_CLAUSE_FINALIZE) #define OACC_WAIT_CLAUSES \ omp_mask (OMP_CLAUSE_ASYNC) #define OACC_ROUTINE_CLAUSES \ Index: gcc/fortran/trans-openmp.c =================================================================== --- gcc/fortran/trans-openmp.c (revision 248095) +++ gcc/fortran/trans-openmp.c (revision 248096) @@ -2936,6 +2936,11 @@ c = build_omp_clause (where.lb->location, OMP_CLAUSE_IF_PRESENT); omp_clauses = gfc_trans_add_clause (c, omp_clauses); } + if (clauses->finalize) + { + c = build_omp_clause (where.lb->location, OMP_CLAUSE_FINALIZE); + omp_clauses = gfc_trans_add_clause (c, omp_clauses); + } if (clauses->independent) { c = build_omp_clause (where.lb->location, OMP_CLAUSE_INDEPENDENT); Index: gcc/fortran/gfortran.h =================================================================== --- gcc/fortran/gfortran.h (revision 248095) +++ gcc/fortran/gfortran.h (revision 248096) @@ -1318,7 +1318,7 @@ gfc_expr_list *tile_list; unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; unsigned wait:1, par_auto:1, gang_static:1, nohost:1, acc_collapse:1, bind:1; - unsigned if_present:1; + unsigned if_present:1, finalize:1; locus loc; char bind_name[GFC_MAX_SYMBOL_LEN+1]; } Index: gcc/gimplify.c =================================================================== --- gcc/gimplify.c (revision 248095) +++ gcc/gimplify.c (revision 248096) @@ -7669,6 +7669,7 @@ case OMP_CLAUSE_SIMD: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_DEVICE_TYPE: + case OMP_CLAUSE_FINALIZE: break; case OMP_CLAUSE_DEFAULTMAP: @@ -8533,6 +8534,7 @@ case OMP_CLAUSE_TILE: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_DEVICE_TYPE: + case OMP_CLAUSE_FINALIZE: break; case OMP_CLAUSE_BIND: Index: gcc/tree-core.h =================================================================== --- gcc/tree-core.h (revision 248095) +++ gcc/tree-core.h (revision 248096) @@ -473,7 +473,10 @@ OMP_CLAUSE_IF_PRESENT, /* OpenACC clause: device_type ( device-type-list). */ - OMP_CLAUSE_DEVICE_TYPE + OMP_CLAUSE_DEVICE_TYPE, + + /* OpenACC clause: finalize. */ + OMP_CLAUSE_FINALIZE }; #undef DEFTREESTRUCT