Hi! This patch implements GOMP_target_enter_exit_data in libgomp, also it fixes a bug in gomp_map_vars_existing. make check-target-libgomp passed. However, I am afraid that there may be some hard-to-find issues (like memory leaks) in cases of mixed (structured+unstructured) data mappings... OK for gomp-4_1-branch?
libgomp/ * target.c (gomp_map_vars_existing): Fix target address for 'always to' array sections. (gomp_unmap_vars): Decrement k->refcount when it's 1 and k->async_refcount is 0. (GOMP_target_enter_exit_data): Add mapping/unmapping. * testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array sections. * testsuite/libgomp.c/target-12.c: New test. diff --git a/libgomp/target.c b/libgomp/target.c index a394e95..83ca827 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -171,7 +171,8 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, if (GOMP_MAP_ALWAYS_TO_P (kind)) devicep->host2dev_func (devicep->target_id, - (void *) (oldn->tgt->tgt_start + oldn->tgt_offset), + (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + + newn->host_start - oldn->host_start), (void *) newn->host_start, newn->host_end - newn->host_start); oldn->refcount++; @@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) bool do_unmap = false; if (k->refcount > 1) k->refcount--; - else if (k->async_refcount > 0) - k->async_refcount--; - else - do_unmap = true; + else if (k->refcount == 1) + { + if (k->async_refcount > 0) + k->async_refcount--; + else + { + k->refcount--; + do_unmap = true; + } + } if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) || tgt->list[i].always_copy_from) @@ -1160,13 +1167,61 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, } if (is_enter_data) - { - /* TODO */ - } + gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, false); else - { - /* TODO */ - } + for (i = 0; i < mapnum; i++) + { + struct splay_tree_key_s cur_node; + unsigned char kind = kinds[i] & typemask; + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + gomp_mutex_lock (&devicep->lock); + splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node); + if (!k) + { + gomp_mutex_unlock (&devicep->lock); + continue; + } + + if (k->refcount > 0) + k->refcount--; + if (kind == GOMP_MAP_DELETE) + k->refcount = 0; + + if ((kind == GOMP_MAP_FROM && k->refcount == 0) + || kind == GOMP_MAP_ALWAYS_FROM) + devicep->dev2host_func (devicep->target_id, + (void *) cur_node.host_start, + (void *) (k->tgt->tgt_start + + k->tgt_offset + + cur_node.host_start + - k->host_start), + cur_node.host_end - cur_node.host_start); + if (k->refcount == 0) + { + splay_tree_remove (&devicep->mem_map, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + gomp_mutex_unlock (&devicep->lock); + break; + case GOMP_MAP_POINTER: + case GOMP_MAP_TO_PSET: + break; + default: + gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", + kind); + } + } } void diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c index b86097a..98882f0 100644 --- a/libgomp/testsuite/libgomp.c/target-11.c +++ b/libgomp/testsuite/libgomp.c/target-11.c @@ -9,6 +9,17 @@ void test_array_section (int *p) { #pragma omp target data map(alloc: p[0:N]) { + int ok = 1; + for (int i = 10; i < 10 + 4; i++) + p[i] = 997 * i; + + #pragma omp target map(always to:p[10:4]) map(tofrom: ok) + for (int i = 10; i < 10 + 4; i++) + if (p[i] != 997 * i) + ok = 0; + + assert (ok); + #pragma omp target map(always from:p[7:9]) for (int i = 0; i < N; i++) p[i] = i; diff --git a/libgomp/testsuite/libgomp.c/target-12.c b/libgomp/testsuite/libgomp.c/target-12.c new file mode 100644 index 0000000..e22f765 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-12.c @@ -0,0 +1,98 @@ +/* { dg-require-effective-target offload_device } */ + +#include <stdlib.h> +#include <assert.h> + +#define N 32 + +int sum; +int var1 = 1; +int var2 = 2; + +void enter_data (int *X) +{ + #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum) +} + +void exit_data_1 () +{ + #pragma omp target exit data map(from: var1) +} + +void exit_data_2 () +{ + #pragma omp target exit data map(from: var2) +} + +void test_nested () +{ + int X = 0, Y = 0, Z = 0; + + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target data map(from: X, Y, Z) + { + #pragma omp target map(from: X, Y, Z) + X = Y = Z = 1337; + assert (X == 0); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target exit data map(from: X) map(release: Y) + assert (X == 0); + assert (Y == 0); + + #pragma omp target exit data map(release: Y) map(delete: Z) + assert (Y == 0); + assert (Z == 0); + } + assert (X == 1337); + assert (Y == 0); + assert (Z == 0); + + #pragma omp target map(from: X) + X = 2448; + assert (X == 2448); + assert (Y == 0); + assert (Z == 0); + + X = 4896; + } + assert (X == 4896); + assert (Y == 0); + assert (Z == 0); +} + +int main () +{ + int *X = malloc (N * sizeof (int)); + int *Y = malloc (N * sizeof (int)); + X[10] = 10; + Y[20] = 20; + enter_data (X); + + #pragma omp target map(alloc: X[:N]) map(to: Y[:N]) map(always from: sum) + { + var1 += X[10]; + var2 += Y[20]; + sum = var1 + var2; + } + + free (X); + free (Y); + + assert (var1 == 1); + assert (var2 == 2); + assert (sum == 33); + + exit_data_1 (); + assert (var1 == 11); + assert (var2 == 2); + + exit_data_2 (); + assert (var2 == 22); + + test_nested (); + + return 0; +} -- Ilya