Hi!

On Fri, 27 Jan 2017 08:06:22 -0800, Cesar Philippidis <ce...@codesourcery.com> 
wrote:
> This patch optimizes GOMP_MAP_TO_PSET in libgomp by installing the
> remapped pointer to the array data directly in the PSET, instead of
> uploading it separately with GOMP_MAP_POINTER. Effectively this
> eliminates the GOMP_MAP_POINTER that is associated with the PSET,
> thereby eliminating an additional host2dev data transfer.
> 
> While it does work, it's not quite as effective as I had hope it would
> be. I'm only observing about 0.05s, if that, in CloverLeaf, and arguably
> that's statistical noise.

Ah, too bad.

> This is probably because CloverLeaf makes use
> of ACC DATA regions in the critical sections, so all of those PSETs and
> POINTERs are already preset on the accelerator.
> 
> One thing I don't like about this patch is that I'm updating the host's
> copy of the PSET prior to uploading it. The host's PSET does get
> restored prior to returning from gomp_map_vars, however that might
> impact things if the host were to run in multi-threaded applications.
> Maybe I'll drop this patch from gomp4 since it's not very effective.

... also there is some bug somewhere; I see:

    PASS: libgomp.fortran/examples-4/async_target-2.f90   -O0  (test for excess 
errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90   -O0  
execution test
    PASS: libgomp.fortran/examples-4/async_target-2.f90   -O1  (test for excess 
errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90   -O1  
execution test
    PASS: libgomp.fortran/examples-4/async_target-2.f90   -O2  (test for excess 
errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90   -O2  
execution test
    PASS: libgomp.fortran/examples-4/async_target-2.f90   -O3 
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  
(test for excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90   -O3 
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  
execution test
    PASS: libgomp.fortran/examples-4/async_target-2.f90   -O3 -g  (test for 
excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90   -O3 -g  
execution test
    PASS: libgomp.fortran/examples-4/async_target-2.f90   -Os  (test for excess 
errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/examples-4/async_target-2.f90   -Os  
execution test

..., and:

    PASS: libgomp.fortran/target3.f90   -O0  (test for excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90   -O0  execution test
    PASS: libgomp.fortran/target3.f90   -O1  (test for excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90   -O1  execution test
    PASS: libgomp.fortran/target3.f90   -O2  (test for excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90   -O2  execution test
    PASS: libgomp.fortran/target3.f90   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  (test for excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
    PASS: libgomp.fortran/target3.f90   -O3 -g  (test for excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90   -O3 -g  execution test
    PASS: libgomp.fortran/target3.f90   -Os  (test for excess errors)
    [-PASS:-]{+FAIL:+} libgomp.fortran/target3.f90   -Os  execution test

In all cases, the run-time error message is:

    libgomp: Pointer target of array section wasn't mapped


For reference, I'm appending the patch, which wasn't included in the
original email.


Grüße
 Thomas


commit 29f783a0e2162fea3e21e7f4295bc16f252e1c1f
Author: cesar <cesar@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Jan 27 15:51:52 2017 +0000

    Optimize GOMP_MAP_TO_PSET.
    
            libgomp/
            * target.c (gomp_map_pset): New function.
            (gomp_map_vars): Update GOMP_MAP_POINTER with GOMP_MAP_TO_PSET, when
            possible.
    
    
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@244984 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog.gomp |   6 +++
 libgomp/target.c       | 101 ++++++++++++++++++++++++++++++++++---------------
 2 files changed, 77 insertions(+), 30 deletions(-)

diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 40b536f..0fb5297 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,5 +1,11 @@
 2017-01-27  Cesar Philippidis  <ce...@codesourcery.com>
 
+       * target.c (gomp_map_pset): New function.
+       (gomp_map_vars): Update GOMP_MAP_POINTER with GOMP_MAP_TO_PSET, when
+       possible.
+
+2017-01-27  Cesar Philippidis  <ce...@codesourcery.com>
+
        * plugin/plugin-nvptx.c (nvptx_exec): Make aware of
        GOMP_MAP_FIRSTPRIVATE_INT host addresses.
        * testsuite/libgomp.oacc-c++/firstprivate-int.C: New test.
diff --git libgomp/target.c libgomp/target.c
index 557f565..590b7dd 100644
--- libgomp/target.c
+++ libgomp/target.c
@@ -295,6 +295,37 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t 
host_ptr,
                      (void *) &cur_node.tgt_offset, sizeof (void *));
 }
 
+static uintptr_t
+gomp_map_pset (struct target_mem_desc *tgt, uintptr_t host_ptr,
+              uintptr_t target_offset, uintptr_t bias)
+{
+  struct gomp_device_descr *devicep = tgt->device_descr;
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+
+  cur_node.host_start = host_ptr;
+
+  /* Add bias to the pointer value.  */
+  cur_node.host_start += bias;
+  cur_node.host_end = cur_node.host_start;
+  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+  if (n == NULL)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("Pointer target of array section wasn't mapped");
+    }
+
+  cur_node.host_start -= n->host_start;
+  cur_node.tgt_offset
+    = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
+  /* At this point tgt_offset is target address of the
+     array section.  Now subtract bias to get what we want
+     to initialize the pointer with.  */
+  cur_node.tgt_offset -= bias;
+
+  return cur_node.tgt_offset;
+}
+
 static void
 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
                          size_t first, size_t i, void **hostaddrs,
@@ -984,36 +1015,46 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                    break;
                  case GOMP_MAP_TO_PSET:
                    /* FIXME: see above FIXME comment.  */
-                   gomp_copy_host2dev (devicep,
-                                       (void *) (tgt->tgt_start
-                                                 + k->tgt_offset),
-                                       (void *) k->host_start,
-                                       k->host_end - k->host_start);
-
-                   for (j = i + 1; j < mapnum; j++)
-                     if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
-                                                        j)
-                                              & typemask))
-                       break;
-                     else if ((uintptr_t) hostaddrs[j] < k->host_start
-                              || ((uintptr_t) hostaddrs[j] + sizeof (void *)
-                                  > k->host_end))
-                       break;
-                     else
-                       {
-                         tgt->list[j].key = k;
-                         tgt->list[j].copy_from = false;
-                         tgt->list[j].always_copy_from = false;
-                         if (k->refcount != REFCOUNT_INFINITY)
-                           k->refcount++;
-                         gomp_map_pointer (tgt,
-                                           (uintptr_t) *(void **) hostaddrs[j],
-                                           k->tgt_offset
-                                           + ((uintptr_t) hostaddrs[j]
-                                              - k->host_start),
-                                           sizes[j]);
-                         i++;
-                       }
+                   {
+                     bool found_pointer = false;
+                     for (j = i + 1; j < mapnum; j++)
+                       if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
+                                                          j)
+                                                & typemask))
+                         break;
+                       else if ((uintptr_t) hostaddrs[j] < k->host_start
+                                || ((uintptr_t) hostaddrs[j] + sizeof (void *)
+                                    > k->host_end))
+                         break;
+                       else
+                         {
+                           uintptr_t tptr = 0;
+                           uintptr_t toffset =
+                             gomp_map_pset (tgt,
+                                            (uintptr_t) *(void **)
+                                            hostaddrs[j],
+                                            k->tgt_offset
+                                            + ((uintptr_t) hostaddrs[j]
+                                               - k->host_start),
+                                            sizes[j]);
+                           tptr = *(uintptr_t *) hostaddrs[i];
+                           *(uintptr_t *) hostaddrs[i]= toffset;
+                           gomp_copy_host2dev (devicep,
+                                               (void *) (tgt->tgt_start
+                                                         + k->tgt_offset),
+                                               (void *) k->host_start,
+                                               k->host_end - k->host_start);
+                           *(uintptr_t *) hostaddrs[i] = tptr;
+                           i++;
+                           found_pointer = true;
+                         }
+                     if (!found_pointer)
+                       gomp_copy_host2dev (devicep,
+                                           (void *) (tgt->tgt_start
+                                                     + k->tgt_offset),
+                                           (void *) k->host_start,
+                                           k->host_end - k->host_start);
+                   }
                    break;
                  case GOMP_MAP_FORCE_PRESENT:
                    {

Reply via email to