Hi! On Fri, 27 Jan 2017 08:06:22 -0800, Cesar Philippidis <ce...@codesourcery.com> wrote: > This patch optimizes GOMP_MAP_TO_PSET in libgomp by installing the > remapped pointer to the array data directly in the PSET, instead of > uploading it separately with GOMP_MAP_POINTER. Effectively this > eliminates the GOMP_MAP_POINTER that is associated with the PSET, > thereby eliminating an additional host2dev data transfer. > > While it does work, it's not quite as effective as I had hope it would > be. I'm only observing about 0.05s, if that, in CloverLeaf, and arguably > that's statistical noise.
Ah, too bad. > This is probably because CloverLeaf makes use > of ACC DATA regions in the critical sections, so all of those PSETs and > POINTERs are already preset on the accelerator. > > One thing I don't like about this patch is that I'm updating the host's > copy of the PSET prior to uploading it. The host's PSET does get > restored prior to returning from gomp_map_vars, however that might > impact things if the host were to run in multi-threaded applications. > Maybe I'll drop this patch from gomp4 since it's not very effective. ... also there is some bug somewhere; I see: PASS: libgomp.fortran/examples-4/async_target-2.f90 -O0 (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O0 execution test PASS: libgomp.fortran/examples-4/async_target-2.f90 -O1 (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O1 execution test PASS: libgomp.fortran/examples-4/async_target-2.f90 -O2 (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O2 execution test PASS: libgomp.fortran/examples-4/async_target-2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test PASS: libgomp.fortran/examples-4/async_target-2.f90 -O3 -g (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -O3 -g execution test PASS: libgomp.fortran/examples-4/async_target-2.f90 -Os (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90 -Os execution test ..., and: PASS: libgomp.fortran/target3.f90 -O0 (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O0 execution test PASS: libgomp.fortran/target3.f90 -O1 (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O1 execution test PASS: libgomp.fortran/target3.f90 -O2 (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O2 execution test PASS: libgomp.fortran/target3.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test PASS: libgomp.fortran/target3.f90 -O3 -g (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -O3 -g execution test PASS: libgomp.fortran/target3.f90 -Os (test for excess errors) [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90 -Os execution test In all cases, the run-time error message is: libgomp: Pointer target of array section wasn't mapped For reference, I'm appending the patch, which wasn't included in the original email. Grüße Thomas commit 29f783a0e2162fea3e21e7f4295bc16f252e1c1f Author: cesar <cesar@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Fri Jan 27 15:51:52 2017 +0000 Optimize GOMP_MAP_TO_PSET. libgomp/ * target.c (gomp_map_pset): New function. (gomp_map_vars): Update GOMP_MAP_POINTER with GOMP_MAP_TO_PSET, when possible. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@244984 138bc75d-0d04-0410-961f-82ee72b054a4 --- libgomp/ChangeLog.gomp | 6 +++ libgomp/target.c | 101 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 77 insertions(+), 30 deletions(-) diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp index 40b536f..0fb5297 100644 --- libgomp/ChangeLog.gomp +++ libgomp/ChangeLog.gomp @@ -1,5 +1,11 @@ 2017-01-27 Cesar Philippidis <ce...@codesourcery.com> + * target.c (gomp_map_pset): New function. + (gomp_map_vars): Update GOMP_MAP_POINTER with GOMP_MAP_TO_PSET, when + possible. + +2017-01-27 Cesar Philippidis <ce...@codesourcery.com> + * plugin/plugin-nvptx.c (nvptx_exec): Make aware of GOMP_MAP_FIRSTPRIVATE_INT host addresses. * testsuite/libgomp.oacc-c++/firstprivate-int.C: New test. diff --git libgomp/target.c libgomp/target.c index 557f565..590b7dd 100644 --- libgomp/target.c +++ libgomp/target.c @@ -295,6 +295,37 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, (void *) &cur_node.tgt_offset, sizeof (void *)); } +static uintptr_t +gomp_map_pset (struct target_mem_desc *tgt, uintptr_t host_ptr, + uintptr_t target_offset, uintptr_t bias) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct splay_tree_s *mem_map = &devicep->mem_map; + struct splay_tree_key_s cur_node; + + cur_node.host_start = host_ptr; + + /* Add bias to the pointer value. */ + cur_node.host_start += bias; + cur_node.host_end = cur_node.host_start; + splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Pointer target of array section wasn't mapped"); + } + + cur_node.host_start -= n->host_start; + cur_node.tgt_offset + = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; + /* At this point tgt_offset is target address of the + array section. Now subtract bias to get what we want + to initialize the pointer with. */ + cur_node.tgt_offset -= bias; + + return cur_node.tgt_offset; +} + static void gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, size_t first, size_t i, void **hostaddrs, @@ -984,36 +1015,46 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; case GOMP_MAP_TO_PSET: /* FIXME: see above FIXME comment. */ - gomp_copy_host2dev (devicep, - (void *) (tgt->tgt_start - + k->tgt_offset), - (void *) k->host_start, - k->host_end - k->host_start); - - for (j = i + 1; j < mapnum; j++) - if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, - j) - & typemask)) - break; - else if ((uintptr_t) hostaddrs[j] < k->host_start - || ((uintptr_t) hostaddrs[j] + sizeof (void *) - > k->host_end)) - break; - else - { - tgt->list[j].key = k; - tgt->list[j].copy_from = false; - tgt->list[j].always_copy_from = false; - if (k->refcount != REFCOUNT_INFINITY) - k->refcount++; - gomp_map_pointer (tgt, - (uintptr_t) *(void **) hostaddrs[j], - k->tgt_offset - + ((uintptr_t) hostaddrs[j] - - k->host_start), - sizes[j]); - i++; - } + { + bool found_pointer = false; + for (j = i + 1; j < mapnum; j++) + if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, + j) + & typemask)) + break; + else if ((uintptr_t) hostaddrs[j] < k->host_start + || ((uintptr_t) hostaddrs[j] + sizeof (void *) + > k->host_end)) + break; + else + { + uintptr_t tptr = 0; + uintptr_t toffset = + gomp_map_pset (tgt, + (uintptr_t) *(void **) + hostaddrs[j], + k->tgt_offset + + ((uintptr_t) hostaddrs[j] + - k->host_start), + sizes[j]); + tptr = *(uintptr_t *) hostaddrs[i]; + *(uintptr_t *) hostaddrs[i]= toffset; + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + k->host_end - k->host_start); + *(uintptr_t *) hostaddrs[i] = tptr; + i++; + found_pointer = true; + } + if (!found_pointer) + gomp_copy_host2dev (devicep, + (void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + k->host_end - k->host_start); + } break; case GOMP_MAP_FORCE_PRESENT: {