Hi Jakub,
This part has changes to 'struct goacc_asyncqueue*' arguments to various
memory copying/mapping functions. To lessen the amount of code changes new 
'gomp_map/unmap_vars_async'
functions names are used (with the non-async original names defined with the 
asyncqueue==NULL).

Inside gomp_target_fini, a 'gomp_fini_device' named function has been 
(re?)introduced,
since there's also asyncqueue destructing now need before the 
->fini_device_func() call.

Thanks,
Chung-Lin


        * target.c (goacc_device_copy_async): New function.
        (gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter,
        add goacc_device_copy_async case.
        (gomp_copy_dev2host): Likewise.
        (gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code.
        (gomp_map_pointer): Likewise.
        (gomp_map_fields_existing): Likewise.
        (gomp_map_vars): Add function for compatiblity.
        (gomp_map_vars_async): Adapt from gomp_map_vars, add goacc_asyncqueue
        parameter.
        (gomp_unmap_tgt): Remove statis, add attribute_hidden.
        (gomp_unmap_vars): Add function for compatiblity.
        (gomp_unmap_vars_async): Adapt from gomp_unmap_vars, add
        goacc_asyncqueue parameter.
        (gomp_fini_device): New function.
        (gomp_exit_data): Adjust gomp_copy_dev2host call.
        (gomp_load_plugin_for_device): Remove old interface, adjust to load
        new async interface.
        (gomp_target_fini): Adjust code to call gomp_fini_device.

diff --git a/libgomp/target.c b/libgomp/target.c
index dda041c..ff5b114 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *devicep,
     }
 }
 
+static inline void
+goacc_device_copy_async (struct gomp_device_descr *devicep,
+                        bool (*copy_func) (int, void *, const void *, size_t,
+                                           struct goacc_asyncqueue *),
+                        const char *dst, void *dstaddr,
+                        const char *src, const void *srcaddr,
+                        size_t size, struct goacc_asyncqueue *aq)
+{
+  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
+                 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
+    }
+}
+
 /* Infrastructure for coalescing adjacent or nearly adjacent (in device 
addresses)
    host to device memory transfers.  */
 
@@ -263,8 +279,9 @@ gomp_to_device_kind_p (int kind)
     }
 }
 
-static void
+attribute_hidden void
 gomp_copy_host2dev (struct gomp_device_descr *devicep,
+                   struct goacc_asyncqueue *aq,
                    void *d, const void *h, size_t sz,
                    struct gomp_coalesce_buf *cbuf)
 {
@@ -293,14 +310,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
            }
        }
     }
-  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+  if (aq)
+    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+                            "dev", d, "host", h, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, 
sz);
 }
 
-static void
+attribute_hidden void
 gomp_copy_dev2host (struct gomp_device_descr *devicep,
+                   struct goacc_asyncqueue *aq,
                    void *h, const void *d, size_t sz)
 {
-  gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
+  if (aq)
+    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
+                            "host", h, "dev", d, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, 
sz);
 }
 
 static void
@@ -318,7 +344,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, 
void *devptr)
    Helper function of gomp_map_vars.  */
 
 static inline void
-gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
+gomp_map_vars_existing (struct gomp_device_descr *devicep,
+                       struct goacc_asyncqueue *aq, splay_tree_key oldn,
                        splay_tree_key newn, struct target_var_desc *tgt_var,
                        unsigned char kind, struct gomp_coalesce_buf *cbuf)
 {
@@ -340,7 +367,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, 
splay_tree_key oldn,
     }
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
-    gomp_copy_host2dev (devicep,
+    gomp_copy_host2dev (devicep, aq,
                        (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
                                  + newn->host_start - oldn->host_start),
                        (void *) newn->host_start,
@@ -358,8 +385,8 @@ get_kind (bool short_mapkind, void *kinds, int idx)
 }
 
 static void
-gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
-                 uintptr_t target_offset, uintptr_t bias,
+gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
+                 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
                  struct gomp_coalesce_buf *cbuf)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
@@ -370,7 +397,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t 
host_ptr,
   if (cur_node.host_start == (uintptr_t) NULL)
     {
       cur_node.tgt_offset = (uintptr_t) NULL;
-      gomp_copy_host2dev (devicep,
+      gomp_copy_host2dev (devicep, aq,
                          (void *) (tgt->tgt_start + target_offset),
                          (void *) &cur_node.tgt_offset,
                          sizeof (void *), cbuf);
@@ -392,12 +419,13 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t 
host_ptr,
      array section.  Now subtract bias to get what we want
      to initialize the pointer with.  */
   cur_node.tgt_offset -= bias;
-  gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
+  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
                      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
 }
 
 static void
-gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+gomp_map_fields_existing (struct target_mem_desc *tgt,
+                         struct goacc_asyncqueue *aq, splay_tree_key n,
                          size_t first, size_t i, void **hostaddrs,
                          size_t *sizes, void *kinds,
                          struct gomp_coalesce_buf *cbuf)
@@ -417,7 +445,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, 
splay_tree_key n,
       && n2->tgt == n->tgt
       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
     {
-      gomp_map_vars_existing (devicep, n2, &cur_node,
+      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
                              &tgt->list[i], kind & typemask, cbuf);
       return;
     }
@@ -433,8 +461,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, 
splay_tree_key n,
              && n2->host_start - n->host_start
                 == n2->tgt_offset - n->tgt_offset)
            {
-             gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
-                                     kind & typemask, cbuf);
+             gomp_map_vars_existing (devicep, aq, n2, &cur_node,
+                                     &tgt->list[i], kind & typemask, cbuf);
              return;
            }
        }
@@ -445,7 +473,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, 
splay_tree_key n,
          && n2->tgt == n->tgt
          && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
        {
-         gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+         gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
                                  kind & typemask, cbuf);
          return;
        }
@@ -482,6 +510,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
               void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
               bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
 {
+  struct target_mem_desc *tgt;
+  tgt = gomp_map_vars_async (devicep, NULL, mapnum, hostaddrs, devaddrs,
+                            sizes, kinds, short_mapkind, pragma_kind);
+  return tgt;
+}
+
+attribute_hidden struct target_mem_desc *
+gomp_map_vars_async (struct gomp_device_descr *devicep,
+                    struct goacc_asyncqueue *aq, size_t mapnum,
+                    void **hostaddrs, void **devaddrs, size_t *sizes, void 
*kinds,
+                    bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+{
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
   bool has_firstprivate = false;
   const int rshift = short_mapkind ? 8 : 3;
@@ -594,7 +634,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
              continue;
            }
          for (i = first; i <= last; i++)
-           gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+           gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
                                      sizes, kinds, NULL);
          i--;
          continue;
@@ -639,7 +679,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
       else
        n = splay_tree_lookup (mem_map, &cur_node);
       if (n && n->refcount != REFCOUNT_LINK)
-       gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+       gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
                                kind & typemask, NULL);
       else
        {
@@ -750,7 +790,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                tgt_size = (tgt_size + align - 1) & ~(align - 1);
                tgt->list[i].offset = tgt_size;
                len = sizes[i];
-               gomp_copy_host2dev (devicep,
+               gomp_copy_host2dev (devicep, aq,
                                    (void *) (tgt->tgt_start + tgt_size),
                                    (void *) hostaddrs[i], len, cbufp);
                tgt_size += len;
@@ -784,7 +824,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                    continue;
                  }
                for (i = first; i <= last; i++)
-                 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+                 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
                                            sizes, kinds, cbufp);
                i--;
                continue;
@@ -804,7 +844,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
                if (cur_node.tgt_offset)
                  cur_node.tgt_offset -= sizes[i];
-               gomp_copy_host2dev (devicep,
+               gomp_copy_host2dev (devicep, aq,
                                    (void *) (n->tgt->tgt_start
                                              + n->tgt_offset
                                              + cur_node.host_start
@@ -825,7 +865,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
              k->host_end = k->host_start + sizeof (void *);
            splay_tree_key n = splay_tree_lookup (mem_map, k);
            if (n && n->refcount != REFCOUNT_LINK)
-             gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+             gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
                                      kind & typemask, cbufp);
            else
              {
@@ -878,18 +918,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                  case GOMP_MAP_FORCE_TOFROM:
                  case GOMP_MAP_ALWAYS_TO:
                  case GOMP_MAP_ALWAYS_TOFROM:
-                   gomp_copy_host2dev (devicep,
+                   gomp_copy_host2dev (devicep, aq,
                                        (void *) (tgt->tgt_start
                                                  + k->tgt_offset),
                                        (void *) k->host_start,
                                        k->host_end - k->host_start, cbufp);
                    break;
                  case GOMP_MAP_POINTER:
-                   gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
+                   gomp_map_pointer (tgt, aq,
+                                     (uintptr_t) *(void **) k->host_start,
                                      k->tgt_offset, sizes[i], cbufp);
                    break;
                  case GOMP_MAP_TO_PSET:
-                   gomp_copy_host2dev (devicep,
+                   gomp_copy_host2dev (devicep, aq,
                                        (void *) (tgt->tgt_start
                                                  + k->tgt_offset),
                                        (void *) k->host_start,
@@ -911,7 +952,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                          tgt->list[j].always_copy_from = false;
                          if (k->refcount != REFCOUNT_INFINITY)
                            k->refcount++;
-                         gomp_map_pointer (tgt,
+                         gomp_map_pointer (tgt, aq,
                                            (uintptr_t) *(void **) hostaddrs[j],
                                            k->tgt_offset
                                            + ((uintptr_t) hostaddrs[j]
@@ -940,7 +981,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                    break;
                  case GOMP_MAP_FORCE_DEVICEPTR:
                    assert (k->host_end - k->host_start == sizeof (void *));
-                   gomp_copy_host2dev (devicep,
+                   gomp_copy_host2dev (devicep, aq,
                                        (void *) (tgt->tgt_start
                                                  + k->tgt_offset),
                                        (void *) k->host_start,
@@ -957,9 +998,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                    /* Set link pointer on target to the device address of the
                       mapped object.  */
                    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
-                   devicep->host2dev_func (devicep->target_id,
-                                           (void *) n->tgt_offset,
-                                           &tgt_addr, sizeof (void *));
+                   gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
+                                       &tgt_addr, sizeof (void *), cbufp);
                  }
                array++;
              }
@@ -971,7 +1011,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
       for (i = 0; i < mapnum; i++)
        {
          cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
-         gomp_copy_host2dev (devicep,
+         gomp_copy_host2dev (devicep, aq,
                              (void *) (tgt->tgt_start + i * sizeof (void *)),
                              (void *) &cur_node.tgt_offset, sizeof (void *),
                              cbufp);
@@ -982,7 +1022,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
     {
       long c = 0;
       for (c = 0; c < cbuf.chunk_cnt; ++c)
-       gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * 
c]),
+       gomp_copy_host2dev (devicep, aq,
+                           (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
                            (char *) cbuf.buf + (cbuf.chunks[2 * c] - 
cbuf.chunks[0]),
                            cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
       free (cbuf.buf);
@@ -1001,7 +1042,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
   return tgt;
 }
 
-static void
+attribute_hidden void
 gomp_unmap_tgt (struct target_mem_desc *tgt)
 {
   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
@@ -1036,6 +1077,13 @@ gomp_remove_var (struct gomp_device_descr *devicep, 
splay_tree_key k)
 attribute_hidden void
 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 {
+  gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
+}
+
+attribute_hidden void
+gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
+                      struct goacc_asyncqueue *aq)
+{
   struct gomp_device_descr *devicep = tgt->device_descr;
 
   if (tgt->list_count == 0)
@@ -1071,7 +1119,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool 
do_copyfrom)
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
          || tgt->list[i].always_copy_from)
-       gomp_copy_dev2host (devicep,
+       gomp_copy_dev2host (devicep, aq,
                            (void *) (k->host_start + tgt->list[i].offset),
                            (void *) (k->tgt->tgt_start + k->tgt_offset
                                      + tgt->list[i].offset),
@@ -1137,9 +1185,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t 
mapnum, void **hostaddrs,
            size_t size = cur_node.host_end - cur_node.host_start;
 
            if (GOMP_MAP_COPY_TO_P (kind & typemask))
-             gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
+             gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
+                                 NULL);
            if (GOMP_MAP_COPY_FROM_P (kind & typemask))
-             gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
+             gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
          }
       }
   gomp_mutex_unlock (&devicep->lock);
@@ -1432,9 +1481,21 @@ gomp_init_device (struct gomp_device_descr *devicep)
                                   false);
     }
 
+  /* Initialize OpenACC asynchronous queues.  */
+  goacc_init_asyncqueues (devicep);
+
   devicep->state = GOMP_DEVICE_INITIALIZED;
 }
 
+attribute_hidden bool
+gomp_fini_device (struct gomp_device_descr *devicep)
+{
+  bool ret = goacc_fini_asyncqueues (devicep);
+  ret &= devicep->fini_device_func (devicep->target_id);
+  devicep->state = GOMP_DEVICE_FINALIZED;
+  return ret;
+}
+
 attribute_hidden void
 gomp_unload_device (struct gomp_device_descr *devicep)
 {
@@ -1924,7 +1985,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t 
mapnum,
 
          if ((kind == GOMP_MAP_FROM && k->refcount == 0)
              || kind == GOMP_MAP_ALWAYS_FROM)
-           gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
+           gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
                                (void *) (k->tgt->tgt_start + k->tgt_offset
                                          + cur_node.host_start
                                          - k->host_start),
@@ -2557,20 +2618,20 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
*device,
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
       if (!DLSYM_OPT (openacc.exec, openacc_exec)
-         || !DLSYM_OPT (openacc.register_async_cleanup,
-                        openacc_register_async_cleanup)
-         || !DLSYM_OPT (openacc.async_test, openacc_async_test)
-         || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
-         || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
-         || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
-         || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
-         || !DLSYM_OPT (openacc.async_wait_all_async,
-                        openacc_async_wait_all_async)
-         || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
          || !DLSYM_OPT (openacc.create_thread_data,
                         openacc_create_thread_data)
          || !DLSYM_OPT (openacc.destroy_thread_data,
-                        openacc_destroy_thread_data))
+                        openacc_destroy_thread_data)
+         || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
+         || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
+         || !DLSYM_OPT (openacc.async.test, openacc_async_test)
+         || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
+         || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
+         || !DLSYM_OPT (openacc.async.queue_callback,
+                        openacc_async_queue_callback)
+         || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
+         || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
+         || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
        {
          /* Require all the OpenACC handlers if we have
             GOMP_OFFLOAD_CAP_OPENACC_200.  */
@@ -2621,10 +2682,7 @@ gomp_target_fini (void)
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
       if (devicep->state == GOMP_DEVICE_INITIALIZED)
-       {
-         ret = devicep->fini_device_func (devicep->target_id);
-         devicep->state = GOMP_DEVICE_FINALIZED;
-       }
+       ret = gomp_fini_device (devicep);
       gomp_mutex_unlock (&devicep->lock);
       if (!ret)
        gomp_fatal ("device finalization failed");

Reply via email to