On Thu, Jul 30, 2015 at 10:12:59 +0200, Jakub Jelinek wrote: > On Wed, Jul 29, 2015 at 10:06:52PM +0300, Ilya Verbin wrote: > > @@ -1241,6 +1245,62 @@ GOMP_target_update (int device, const void *unused, > > size_t mapnum, > > gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); > > } > > > > +static void > > +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, > > + void **hostaddrs, size_t *sizes, unsigned short *kinds) > > +{ > > + const int typemask = 0xff; > > + size_t i; > > + gomp_mutex_lock (&devicep->lock); > > + for (i = 0; i < mapnum; i++) > > + { > > + struct splay_tree_key_s cur_node; > > + unsigned char kind = kinds[i] & typemask; > > + switch (kind) > > + { > > + case GOMP_MAP_FROM: > > + case GOMP_MAP_ALWAYS_FROM: > > + case GOMP_MAP_DELETE: > > + case GOMP_MAP_RELEASE: > > Please handle here GOMP_MAP_ZERO_LEN_ARRAY_SECTION too. > It should use gomp_map_lookup (while all others splay_tree_lookup), > otherwise it is the same as GOMP_MAP_RELEASE.
Done. > > @@ -1280,13 +1337,20 @@ GOMP_target_enter_exit_data (int device, size_t > > mapnum, void **hostaddrs, > > } > > > > if (is_enter_data) > > - { > > - /* TODO */ > > - } > > + for (i = 0; i < mapnum; i++) > > + { > > + struct target_mem_desc *tgt_var > > + = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], > > + &kinds[i], true, false); > > + tgt_var->refcount--; > > + > > + /* If the variable was already mapped, tgt_var is not needed. Otherwise > > + tgt_var will be freed by gomp_unmap_vars or gomp_exit_data. */ > > + if (tgt_var->refcount == 0) > > + free (tgt_var); > > This is racy, you don't hold the device lock here anymore, so you shouldn't > decrease refcounts or test it etc. > I think better would be to change the bool is_target argument to > gomp_map_vars into an enum, and use 3 values there for now > - GOMP_VARS_MAP_TARGET, GOMP_VARS_MAP_DATA, GOMP_VARS_MAP_ENTER_DATA or so, > and for GOMP_VARS_MAP_ENTER_DATA perform the decrement of refcount and > freeing if it is zero (but then also better return NULL). Fixed. > > diff --git a/libgomp/testsuite/libgomp.c/target-20.c > > b/libgomp/testsuite/libgomp.c/target-20.c > > new file mode 100644 > > index 0000000..ec7e245 > > --- /dev/null > > +++ b/libgomp/testsuite/libgomp.c/target-20.c > > @@ -0,0 +1,111 @@ > > +/* { dg-require-effective-target offload_device } */ > > This test will fail on HSA, you don't assume just that it doesn't > fallback to host, but also non-shared address space. Fixed. make check-target-libgomp passed. ok? libgomp/ * libgomp.h (enum gomp_map_vars_kind): New. (gomp_map_vars): Change type of the argument from bool to enum gomp_map_vars_kind. * oacc-mem.c (acc_map_data, present_create_copy, gomp_acc_insert_pointer): Pass GOMP_MAP_VARS_OPENACC instead of false to gomp_map_vars. * oacc-parallel.c (GOACC_parallel, GOACC_data_start): Likewise. * target.c (gomp_map_vars_existing): Fix target address for 'always to' array sections. (gomp_map_vars): Change type of the argument from bool to enum gomp_map_vars_kind, fixup its usage. Set tgt->refcount to 0 if called from GOMP_target_enter_exit_data. Free tgt if called from GOMP_target_enter_exit_data and nothing has been mapped. (gomp_unmap_vars): Decrement k->refcount when it is 1 and k->async_refcount is 0. (gomp_offload_image_to_device): Set tgt's refcount to infinity. (GOMP_target, GOMP_target_41): Pass GOMP_MAP_VARS_TARGET instead of true to gomp_map_vars. (gomp_target_data_fallback, GOMP_target_data, GOMP_target_data_41): Pass GOMP_MAP_VARS_DATA instead of false to gomp_map_vars. (gomp_exit_data): New static function. (GOMP_target_enter_exit_data): Support mapping/unmapping. * testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array sections. * testsuite/libgomp.c/target-20.c: New test. diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 707acaf..9031649 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -787,12 +787,22 @@ struct gomp_device_descr acc_dispatch_t openacc; }; +/* Kind of the pragma, for which gomp_map_vars () is called. */ +enum gomp_map_vars_kind +{ + GOMP_MAP_VARS_OPENACC, + GOMP_MAP_VARS_TARGET, + GOMP_MAP_VARS_DATA, + GOMP_MAP_VARS_ENTER_DATA +}; + extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); extern void gomp_acc_remove_pointer (void *, bool, int, int); extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, size_t, void **, void **, - size_t *, void *, bool, bool); + size_t *, void *, bool, + enum gomp_map_vars_kind); extern void gomp_copy_from_async (struct target_mem_desc *); extern void gomp_unmap_vars (struct target_mem_desc *, bool); extern void gomp_init_device (struct gomp_device_descr *); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index c0fcb07..af067d6 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -289,7 +289,8 @@ acc_map_data (void *h, void *d, size_t s) if (d != h) gomp_fatal ("cannot map data on shared-memory system"); - tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); } else { @@ -318,7 +319,7 @@ acc_map_data (void *h, void *d, size_t s) gomp_mutex_unlock (&acc_dev->lock); tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, false); + &kinds, true, GOMP_MAP_VARS_OPENACC); } gomp_mutex_lock (&acc_dev->lock); @@ -447,7 +448,7 @@ present_create_copy (unsigned f, void *h, size_t s) gomp_mutex_unlock (&acc_dev->lock); tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, - false); + GOMP_MAP_VARS_OPENACC); gomp_mutex_lock (&acc_dev->lock); @@ -594,7 +595,7 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, - NULL, sizes, kinds, true, false); + NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); gomp_mutex_lock (&acc_dev->lock); diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 8ea3dd1..38c4770 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -131,7 +131,7 @@ GOACC_parallel (int device, void (*fn) (void *), tgt_fn = (void (*)) fn; tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - false); + GOMP_MAP_VARS_OPENACC); devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) @@ -178,7 +178,8 @@ GOACC_data_start (int device, size_t mapnum, if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || host_fallback) { - tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; @@ -187,7 +188,7 @@ GOACC_data_start (int device, size_t mapnum, gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - false); + GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; diff --git a/libgomp/target.c b/libgomp/target.c index d7f4693..565982b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -188,7 +188,8 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, if (GOMP_MAP_ALWAYS_TO_P (kind)) devicep->host2dev_func (devicep->target_id, - (void *) (oldn->tgt->tgt_start + oldn->tgt_offset), + (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + + newn->host_start - oldn->host_start), (void *) newn->host_start, newn->host_end - newn->host_start); if (oldn->refcount != REFCOUNT_INFINITY) @@ -247,7 +248,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, attribute_hidden struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, - bool short_mapkind, bool is_target) + bool short_mapkind, enum gomp_map_vars_kind pragma_kind) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; bool has_firstprivate = false; @@ -258,7 +259,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = 1; + tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; tgt->device_descr = devicep; if (mapnum == 0) @@ -266,7 +267,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt_align = sizeof (void *); tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { size_t align = 4 * sizeof (void *); tgt_align = align; @@ -377,7 +378,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt->tgt_start = (uintptr_t) tgt->to_free; tgt->tgt_end = tgt->tgt_start + sizes[0]; } - else if (not_found_cnt || is_target) + else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -396,7 +397,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } tgt_size = 0; - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) tgt_size = mapnum * sizeof (void *); tgt->array = NULL; @@ -560,7 +561,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - if (is_target) + if (pragma_kind == GOMP_MAP_VARS_TARGET) { for (i = 0; i < mapnum; i++) { @@ -587,6 +588,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } + /* If the variable from "omp target enter data" map-list was already mapped, + tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or + gomp_exit_data. */ + if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + { + free (tgt); + tgt = NULL; + } + gomp_mutex_unlock (&devicep->lock); return tgt; } @@ -661,15 +671,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) continue; bool do_unmap = false; - if (k->refcount > 1) + if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + else if (k->refcount == 1) { - if (k->refcount != REFCOUNT_INFINITY) - k->refcount--; + if (k->async_refcount > 0) + k->async_refcount--; + else + { + k->refcount--; + do_unmap = true; + } } - else if (k->async_refcount > 0) - k->async_refcount--; - else - do_unmap = true; if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) || tgt->list[i].always_copy_from) @@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep, /* Insert host-target address mapping into splay tree. */ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); - tgt->refcount = 1; + tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; tgt->tgt_end = 0; tgt->to_free = NULL; @@ -1080,7 +1093,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, struct target_mem_desc *tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - true); + GOMP_MAP_VARS_TARGET); struct gomp_thread old_thr, *thr = gomp_thread (); old_thr = *thr; memset (thr, '\0', sizeof (*thr)); @@ -1140,7 +1153,7 @@ GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum, struct target_mem_desc *tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, - true); + GOMP_MAP_VARS_TARGET); struct gomp_thread old_thr, *thr = gomp_thread (); old_thr = *thr; memset (thr, '\0', sizeof (*thr)); @@ -1168,7 +1181,8 @@ gomp_target_data_fallback (void) new #pragma omp target data, otherwise GOMP_target_end_data would get out of sync. */ struct target_mem_desc *tgt - = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); + = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, + GOMP_MAP_VARS_DATA); tgt->prev = icv->target_data; icv->target_data = tgt; } @@ -1186,7 +1200,7 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - false); + GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -1204,7 +1218,7 @@ GOMP_target_data_41 (int device, size_t mapnum, void **hostaddrs, size_t *sizes, struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, - false); + GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -1235,6 +1249,65 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); } +static void +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + const int typemask = 0xff; + size_t i; + gomp_mutex_lock (&devicep->lock); + for (i = 0; i < mapnum; i++) + { + struct splay_tree_key_s cur_node; + unsigned char kind = kinds[i] & typemask; + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key k = kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION + ? gomp_map_lookup (&devicep->mem_map, &cur_node) + : splay_tree_lookup (&devicep->mem_map, &cur_node); + if (!k) + continue; + + if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) + k->refcount--; + if (kind == GOMP_MAP_DELETE && k->refcount != REFCOUNT_INFINITY) + k->refcount = 0; + + if ((kind == GOMP_MAP_FROM && k->refcount == 0) + || kind == GOMP_MAP_ALWAYS_FROM) + devicep->dev2host_func (devicep->target_id, + (void *) cur_node.host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset + + cur_node.host_start + - k->host_start), + cur_node.host_end - cur_node.host_start); + if (k->refcount == 0) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + break; + default: + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", + kind); + } + } + + gomp_mutex_unlock (&devicep->lock); +} + void GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds) @@ -1253,9 +1326,6 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, { unsigned char kind = kinds[i] & typemask; - if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) - continue; - if (kind == GOMP_MAP_ALLOC || kind == GOMP_MAP_TO || kind == GOMP_MAP_ALWAYS_TO) @@ -1267,20 +1337,19 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, if (kind == GOMP_MAP_FROM || kind == GOMP_MAP_ALWAYS_FROM || kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_RELEASE) + || kind == GOMP_MAP_RELEASE + || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) break; gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", kind); } if (is_enter_data) - { - /* TODO */ - } + for (i = 0; i < mapnum; i++) + gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); else - { - /* TODO */ - } + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); } void diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c index ed6a17a..625c286 100644 --- a/libgomp/testsuite/libgomp.c/target-11.c +++ b/libgomp/testsuite/libgomp.c/target-11.c @@ -9,6 +9,17 @@ void test_array_section (int *p) { #pragma omp target data map(alloc: p[0:N]) { + int ok = 1; + for (int i = 10; i < 10 + 4; i++) + p[i] = 997 * i; + + #pragma omp target map(always to:p[10:4]) map(tofrom: ok) + for (int i = 10; i < 10 + 4; i++) + if (p[i] != 997 * i) + ok = 0; + + assert (ok); + #pragma omp target map(always from:p[7:9]) for (int i = 0; i < N; i++) p[i] = i; diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c new file mode 100644 index 0000000..3f4e798 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-20.c @@ -0,0 +1,120 @@ +/* { dg-require-effective-target offload_device_nonshared_as } */ + +#include <stdlib.h> +#include <assert.h> + +#define N 40 + +int sum; +int var1 = 1; +int var2 = 2; + +#pragma omp declare target +int D[N]; +#pragma omp end declare target + +void enter_data (int *X) +{ + #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum) +} + +void exit_data_0 (int *D) +{ + #pragma omp target exit data map(delete: D[:N]) +} + +void exit_data_1 () +{ + #pragma omp target exit data map(from: var1) +} + +void exit_data_2 (int *X) +{ + #pragma omp target exit data map(from: var2) map(release: X[:N], sum) +} + +void exit_data_3 (int *p) +{ + #pragma omp target exit data map(from: p[:0]) +} + +void test_nested () +{ + int X = 0, Y = 0, Z = 0; + + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target map(from: X, Y, Z) + X = Y = Z = 1337; + assert (X == 0); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target exit data map(from: X) map(release: Y) + assert (X == 0); + assert (Y == 0); + + #pragma omp target exit data map(release: Y) map(delete: Z) + assert (Y == 0); + assert (Z == 0); + } + assert (X == 1337); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target map(from: X) + X = 2448; + assert (X == 2448); + assert (Y == 0); + assert (Z == 0); + + X = 4896; + } + assert (X == 4896); + assert (Y == 0); + assert (Z == 0); +} + +int main () +{ + int *X = malloc (N * sizeof (int)); + int *Y = malloc (N * sizeof (int)); + X[10] = 10; + Y[20] = 20; + enter_data (X); + + exit_data_0 (D); /* This should have no effect on D. */ + + #pragma omp target map(alloc: var1, var2, X[:N]) map(to: Y[:N]) map(always from: sum) + { + var1 += X[10]; + var2 += Y[20]; + sum = var1 + var2; + D[sum]++; + } + + assert (var1 == 1); + assert (var2 == 2); + assert (sum == 33); + + exit_data_1 (); + assert (var1 == 11); + assert (var2 == 2); + + /* Increase refcount of already mapped X[0:N]. */ + #pragma omp target enter data map(alloc: X[16:1]) + + exit_data_2 (X); + assert (var2 == 22); + + exit_data_3 (X + 5); /* Unmap X[0:N]. */ + + free (X); + free (Y); + + test_nested (); + + return 0; +} -- Ilya