This patch has been broken out of the patch supporting OpenACC 2.6 manual
deep copy last posted here:

  https://gcc.gnu.org/ml/gcc-patches/2018-12/msg01084.html

As part of developing that patch, libgomp's OpenACC reference counting
implementation proved to be somewhat inconsistent, especially when
used in combination with the deep copy support which exercises it
more thoroughly.

So, this patch contains just the changes to reference-counting behaviour,
for ease of (re-)review.  The other parts of OpenACC 2.6 manual deep
copy support are forthcoming, but some changes in this patch anticipate
that support.

Tested with offloading to NVPTX, with good results (though a couple of
tests need fixing also).

A follow-up patch provides self-checking of the reference count
implementation.

OK for trunk?

Thanks,

Julian

2019-10-02  Julian Brown  <jul...@codesourcery.com>

        libgomp/
        * libgomp.h (VREFCOUNT_LINK_KEY): New macro.
        (struct splay_tree_key_s): Put link_key field into a new union.
        Substitute dynamic_refcount field for virtual_refcount.
        (struct acc_dispatch_t): Remove data_environ field.
        (enum gomp_map_vars_kind): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA.
        (gomp_acc_insert_pointer): Remove prototype.
        (gomp_acc_remove_pointer): Update prototype.
        (gomp_free_memmap): Remove prototype.
        (gomp_remove_var_async): Add prototype.
        * oacc-host.c (host_dispatch): Don't initialise removed data_environ
        field.
        * oacc-init.c (acc_shutdown_1): Use gomp_remove_var instead of
        gomp_free_memmap.
        * oacc-mem.c (lookup_dev_1): New function.
        (lookup_dev): Reimplement using above.
        (acc_free, acc_hostptr): Update calls to lookup_dev.
        (acc_map_data): Likewise.  Don't add to data_environ list.
        (acc_unmap_data): Remove call to gomp_unmap_vars.  Fix semantics to
        remove mapping, but not mapped data.
        (present_create_copy): Use virtual_refcount instead of
        dynamic_refcount.  Don't manipulate data_environ.  Fix target pointer
        return value.
        (delete_copyout): Update for virtual_refcount semantics.  Use
        goacc_remove_var_async for asynchronous delete/copyouts.
        (gomp_acc_insert_pointer): Remove function.
        (gomp_acc_remove_pointer): Reimplement.
        * oacc-parallel.c (find_pointer): Make a little more strict.
        (GOACC_enter_exit_data): Call gomp_map_vars_async directly instead of
        calling gomp_acc_insert_pointer.  Update call to
        gomp_acc_remove_pointer.
        * target.c (gomp_map_vars_internal): Handle
        GOMP_MAP_VARS_OPENACC_ENTER_DATA.  Update for virtual_refcount
        semantics.
        (gomp_remove_var): Reimplement in terms of...
        (gomp_remove_var_internal): ...this new helper function.
        (gomp_remove_var_async): Implement using above helper funciton.
        (gomp_unref_tgt): Reimplement.
        (gomp_unref_tgt_void): New function.
        (gomp_unmap_vars_internal): Update for virtual_refcount
        semantics.  Check for special virtual_refcount tag value before using
        link_key.
        (gomp_load_image_to_device): Zero-initialise virtual_refcount fields.
        (gomp_free_memmap): Remove function.
        (gomp_exit_data): Check virtual_refcount for tag value before using
        link_key.
        (omp_target_associate_ptr): Zero-initialise virtual_refcount and
        link_key splay tree key fields.
        (gomp_target_init): Don't initialise removed data_environ field.
        * testsuite/libgomp.oacc-c-c++-common/context-2.c: Use correct API to
        deallocate acc_copyin'd data.
        * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise.
        * testsuite/libgomp.oacc-fortran/data-2.f90: Update test.
---
 libgomp/libgomp.h                             |  36 +-
 libgomp/oacc-host.c                           |   2 -
 libgomp/oacc-init.c                           |  10 +-
 libgomp/oacc-mem.c                            | 347 ++++++------------
 libgomp/oacc-parallel.c                       |  44 ++-
 libgomp/target.c                              | 127 ++++---
 .../libgomp.oacc-c-c++-common/context-2.c     |   6 +-
 .../libgomp.oacc-c-c++-common/context-4.c     |   6 +-
 .../testsuite/libgomp.oacc-fortran/data-2.f90 |   7 +-
 9 files changed, 278 insertions(+), 307 deletions(-)

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 178eb600ccd..6b7ed7248a1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -908,6 +908,10 @@ struct target_mem_desc {
 #define OFFSET_POINTER (~(uintptr_t) 1)
 #define OFFSET_STRUCT (~(uintptr_t) 2)
 
+/* A special tag value for "virtual_refcount" in the splay_tree_key_s structure
+   below.  */
+#define VREFCOUNT_LINK_KEY (~(uintptr_t) 0)
+
 struct splay_tree_key_s {
   /* Address of the host object.  */
   uintptr_t host_start;
@@ -919,10 +923,18 @@ struct splay_tree_key_s {
   uintptr_t tgt_offset;
   /* Reference count.  */
   uintptr_t refcount;
-  /* Dynamic reference count.  */
-  uintptr_t dynamic_refcount;
-  /* Pointer to the original mapping of "omp declare target link" object.  */
-  splay_tree_key link_key;
+  /* Reference counts beyond those that represent genuine references in the
+     linked splay tree key/target memory structures, e.g. for multiple OpenACC
+     "present increment" operations (via "acc enter data") referring to the 
same
+     host-memory block.
+     If set to VREFCOUNT_LINK_KEY (for OpenMP, where this field is not 
otherwise
+     needed), the union below represents a link key.  */
+  uintptr_t virtual_refcount;
+  union {
+    /* Pointer to the original mapping of "omp declare target link" object.
+       Only used for OpenMP.  */
+    splay_tree_key link_key;
+  } u;
 };
 
 /* The comparison function.  */
@@ -944,13 +956,6 @@ splay_compare (splay_tree_key x, splay_tree_key y)
 
 typedef struct acc_dispatch_t
 {
-  /* This is a linked list of data mapped using the
-     acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas.
-     Unlike mapped_data in the goacc_thread struct, unmapping can
-     happen out-of-order with respect to mapping.  */
-  /* This is guarded by the lock in the "outer" struct gomp_device_descr.  */
-  struct target_mem_desc *data_environ;
-
   /* Execute.  */
   __typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
 
@@ -1060,13 +1065,15 @@ struct gomp_device_descr
 enum gomp_map_vars_kind
 {
   GOMP_MAP_VARS_OPENACC,
+  GOMP_MAP_VARS_OPENACC_ENTER_DATA,
   GOMP_MAP_VARS_TARGET,
   GOMP_MAP_VARS_DATA,
   GOMP_MAP_VARS_ENTER_DATA
 };
 
-extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
-extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
+extern void gomp_acc_remove_pointer (struct gomp_device_descr *, void **,
+                                    size_t *, unsigned short *, int, bool,
+                                    int);
 extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
                                       unsigned short *);
 struct gomp_coalesce_buf;
@@ -1092,9 +1099,10 @@ extern void gomp_unmap_vars_async (struct 
target_mem_desc *, bool,
                                   struct goacc_asyncqueue *);
 extern void gomp_init_device (struct gomp_device_descr *);
 extern bool gomp_fini_device (struct gomp_device_descr *);
-extern void gomp_free_memmap (struct splay_tree_s *);
 extern void gomp_unload_device (struct gomp_device_descr *);
 extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
+extern void gomp_remove_var_async (struct gomp_device_descr *, splay_tree_key,
+                                  struct goacc_asyncqueue *);
 
 /* work.c */
 
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 12299aee65d..1b9adcec774 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -264,8 +264,6 @@ static struct gomp_device_descr host_dispatch =
     .state = GOMP_DEVICE_UNINITIALIZED,
 
     .openacc = {
-      .data_environ = NULL,
-
       .exec_func = host_openacc_exec,
 
       .create_thread_data_func = host_openacc_create_thread_data,
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index e1568c535b3..e0395ef43b2 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -356,9 +356,13 @@ acc_shutdown_1 (acc_device_t d)
 
       if (walk->dev)
        {
-         gomp_mutex_lock (&walk->dev->lock);
-         gomp_free_memmap (&walk->dev->mem_map);
-         gomp_mutex_unlock (&walk->dev->lock);
+         while (walk->dev->mem_map.root)
+           {
+             splay_tree_key k = &walk->dev->mem_map.root->key;
+             if (k->virtual_refcount == VREFCOUNT_LINK_KEY)
+               k->u.link_key = NULL;
+             gomp_remove_var (walk->dev, k);
+           }
 
          walk->dev = NULL;
          walk->base_dev = NULL;
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 2f271009fb8..25084b71a2d 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -50,6 +50,25 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t 
s)
   return key;
 }
 
+/* Helper for lookup_dev.  Iterate over splay tree.  */
+
+static splay_tree_key
+lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
+{
+  splay_tree_key k = &node->key;
+  struct target_mem_desc *t = k->tgt;
+
+  if (d >= t->tgt_start && d + s <= t->tgt_end)
+    return k;
+
+  if (node->left)
+    return lookup_dev_1 (node->left, d, s);
+  if (node->right)
+    return lookup_dev_1 (node->right, d, s);
+
+  return NULL;
+}
+
 /* Return block containing [D->S), or NULL if not contained.
    The list isn't ordered by device address, so we have to iterate
    over the whole array.  This is not expected to be a common
@@ -57,35 +76,12 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t 
s)
    remains locked on exit.  */
 
 static splay_tree_key
-lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
+lookup_dev (splay_tree mem_map, void *d, size_t s)
 {
-  int i;
-  struct target_mem_desc *t;
-
-  if (!tgt)
+  if (!mem_map || !mem_map->root)
     return NULL;
 
-  for (t = tgt; t != NULL; t = t->prev)
-    {
-      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
-        break;
-    }
-
-  if (!t)
-    return NULL;
-
-  for (i = 0; i < t->list_count; i++)
-    {
-      void * offset;
-
-      splay_tree_key k = &t->array[i].key;
-      offset = d - t->tgt_start + k->tgt_offset;
-
-      if (k->host_start + offset <= (void *) k->host_end)
-        return k;
-    }
-
-  return NULL;
+  return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
 }
 
 /* OpenACC is silent on how memory exhaustion is indicated.  We return
@@ -150,7 +146,7 @@ acc_free (void *d)
   /* We don't have to call lazy open here, as the ptr value must have
      been returned by acc_malloc.  It's not permitted to pass NULL in
      (unless you got that null from acc_malloc).  */
-  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
+  if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
     {
       void *offset;
 
@@ -301,7 +297,7 @@ acc_hostptr (void *d)
 
   gomp_mutex_lock (&acc_dev->lock);
 
-  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
+  n = lookup_dev (&acc_dev->mem_map, d, 1);
 
   if (!n)
     {
@@ -396,7 +392,7 @@ acc_map_data (void *h, void *d, size_t s)
                      (int)s);
        }
 
-      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
+      if (lookup_dev (&thr->dev->mem_map, d, s))
         {
          gomp_mutex_unlock (&acc_dev->lock);
          gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
@@ -415,11 +411,6 @@ acc_map_data (void *h, void *d, size_t s)
          thr->api_info = NULL;
        }
     }
-
-  gomp_mutex_lock (&acc_dev->lock);
-  tgt->prev = acc_dev->openacc.data_environ;
-  acc_dev->openacc.data_environ = tgt;
-  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -427,6 +418,7 @@ acc_unmap_data (void *h)
 {
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
 
   /* No need to call lazy open, as the address must have been mapped.  */
 
@@ -438,12 +430,11 @@ acc_unmap_data (void *h)
   acc_api_info api_info;
   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
 
-  size_t host_size;
-
   gomp_mutex_lock (&acc_dev->lock);
 
-  splay_tree_key n = lookup_host (acc_dev, h, 1);
-  struct target_mem_desc *t;
+  cur_node.host_start = (uintptr_t) h;
+  cur_node.host_end = cur_node.host_start + 1;
+  splay_tree_key n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
 
   if (!n)
     {
@@ -451,47 +442,28 @@ acc_unmap_data (void *h)
       gomp_fatal ("%p is not a mapped block", (void *)h);
     }
 
-  host_size = n->host_end - n->host_start;
-
   if (n->host_start != (uintptr_t) h)
     {
+      size_t host_size = n->host_end - n->host_start;
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("[%p,%d] surrounds %p",
                  (void *) n->host_start, (int) host_size, (void *) h);
     }
 
-  /* Mark for removal.  */
-  n->refcount = 1;
+  splay_tree_remove (&acc_dev->mem_map, n);
 
-  t = n->tgt;
+  struct target_mem_desc *tgt = n->tgt;
 
-  if (t->refcount == 2)
+  if (tgt->refcount > 0)
+    tgt->refcount--;
+  else
     {
-      struct target_mem_desc *tp;
-
-      /* This is the last reference, so pull the descriptor off the
-         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
-         freeing the device memory. */
-      t->tgt_end = 0;
-      t->to_free = 0;
-
-      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-          tp = t, t = t->prev)
-       if (n->tgt == t)
-         {
-           if (tp)
-             tp->prev = t->prev;
-           else
-             acc_dev->openacc.data_environ = t->prev;
-
-           break;
-         }
+      free (tgt->array);
+      free (tgt);
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
 
-  gomp_unmap_vars (t, true);
-
   if (profiling_p)
     {
       thr->prof_info = NULL;
@@ -549,11 +521,14 @@ present_create_copy (unsigned f, void *h, size_t s, int 
async)
          gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
        }
 
+      assert (n->virtual_refcount != VREFCOUNT_LINK_KEY);
+
       if (n->refcount != REFCOUNT_INFINITY)
        {
          n->refcount++;
-         n->dynamic_refcount++;
+         n->virtual_refcount++;
        }
+
       gomp_mutex_unlock (&acc_dev->lock);
     }
   else if (!(f & FLAG_CREATE))
@@ -563,7 +538,6 @@ present_create_copy (unsigned f, void *h, size_t s, int 
async)
     }
   else
     {
-      struct target_mem_desc *tgt;
       size_t mapnum = 1;
       unsigned short kinds;
       void *hostaddrs = h;
@@ -577,17 +551,14 @@ present_create_copy (unsigned f, void *h, size_t s, int 
async)
 
       goacc_aq aq = get_goacc_asyncqueue (async);
 
-      tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
-                                &kinds, true, GOMP_MAP_VARS_OPENACC);
-      /* Initialize dynamic refcount.  */
-      tgt->list[0].key->dynamic_refcount = 1;
+      gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, &kinds,
+                          true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
 
       gomp_mutex_lock (&acc_dev->lock);
-
-      d = tgt->to_free;
-      tgt->prev = acc_dev->openacc.data_environ;
-      acc_dev->openacc.data_environ = tgt;
-
+      n = lookup_host (acc_dev, h, s);
+      assert (n != NULL);
+      d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h
+                   - n->host_start);
       gomp_mutex_unlock (&acc_dev->lock);
     }
 
@@ -671,7 +642,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, 
const char *libfnname)
 {
   size_t host_size;
   splay_tree_key n;
-  void *d;
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
@@ -700,8 +670,7 @@ delete_copyout (unsigned f, void *h, size_t s, int async, 
const char *libfnname)
       gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
     }
 
-  d = (void *) (n->tgt->tgt_start + n->tgt_offset
-               + (uintptr_t) h - n->host_start);
+  assert (n->virtual_refcount != VREFCOUNT_LINK_KEY);
 
   host_size = n->host_end - n->host_start;
 
@@ -715,48 +684,34 @@ delete_copyout (unsigned f, void *h, size_t s, int async, 
const char *libfnname)
   if (n->refcount == REFCOUNT_INFINITY)
     {
       n->refcount = 0;
-      n->dynamic_refcount = 0;
-    }
-  if (n->refcount < n->dynamic_refcount)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("Dynamic reference counting assert fail\n");
+      n->virtual_refcount = 0;
     }
 
   if (f & FLAG_FINALIZE)
     {
-      n->refcount -= n->dynamic_refcount;
-      n->dynamic_refcount = 0;
+      n->refcount -= n->virtual_refcount;
+      n->virtual_refcount = 0;
     }
-  else if (n->dynamic_refcount)
+
+  if (n->virtual_refcount > 0)
     {
-      n->dynamic_refcount--;
       n->refcount--;
+      n->virtual_refcount--;
     }
+  else if (n->refcount > 0)
+    n->refcount--;
 
   if (n->refcount == 0)
     {
-      if (n->tgt->refcount == 2)
-       {
-         struct target_mem_desc *tp, *t;
-         for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-              tp = t, t = t->prev)
-           if (n->tgt == t)
-             {
-               if (tp)
-                 tp->prev = t->prev;
-               else
-                 acc_dev->openacc.data_environ = t->prev;
-               break;
-             }
-       }
+      goacc_aq aq = get_goacc_asyncqueue (async);
 
       if (f & FLAG_COPYOUT)
-       {
-         goacc_aq aq = get_goacc_asyncqueue (async);
+        {
+         void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
+                             + (uintptr_t) h - n->host_start);
          gomp_copy_dev2host (acc_dev, aq, h, d, s);
        }
-      gomp_remove_var (acc_dev, n);
+      gomp_remove_var_async (acc_dev, n, aq);
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
@@ -894,140 +849,80 @@ acc_update_self_async (void *h, size_t s, int async)
 }
 
 void
-gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
-                        void *kinds, int async)
-{
-  struct target_mem_desc *tgt;
-  struct goacc_thread *thr = goacc_thread ();
-  struct gomp_device_descr *acc_dev = thr->dev;
-
-  if (acc_is_present (*hostaddrs, *sizes))
-    {
-      splay_tree_key n;
-      gomp_mutex_lock (&acc_dev->lock);
-      n = lookup_host (acc_dev, *hostaddrs, *sizes);
-      gomp_mutex_unlock (&acc_dev->lock);
-
-      tgt = n->tgt;
-      for (size_t i = 0; i < tgt->list_count; i++)
-       if (tgt->list[i].key == n)
-         {
-           for (size_t j = 0; j < mapnum; j++)
-             if (i + j < tgt->list_count && tgt->list[i + j].key)
-               {
-                 tgt->list[i + j].key->refcount++;
-                 tgt->list[i + j].key->dynamic_refcount++;
-               }
-           return;
-         }
-      /* Should not reach here.  */
-      gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
-    }
-
-  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
-  goacc_aq aq = get_goacc_asyncqueue (async);
-  tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
-                            NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
-  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
-
-  /* Initialize dynamic refcount.  */
-  tgt->list[0].key->dynamic_refcount = 1;
-
-  gomp_mutex_lock (&acc_dev->lock);
-  tgt->prev = acc_dev->openacc.data_environ;
-  acc_dev->openacc.data_environ = tgt;
-  gomp_mutex_unlock (&acc_dev->lock);
-}
-
-void
-gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
-                        int finalize, int mapnum)
+gomp_acc_remove_pointer (struct gomp_device_descr *acc_dev, void **hostaddrs,
+                        size_t *sizes, unsigned short *kinds, int async,
+                        bool finalize, int mapnum)
 {
-  struct goacc_thread *thr = goacc_thread ();
-  struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
   splay_tree_key n;
-  struct target_mem_desc *t;
-  int minrefs = (mapnum == 1) ? 2 : 3;
-
-  if (!acc_is_present (h, s))
-    return;
 
   gomp_mutex_lock (&acc_dev->lock);
 
-  n = lookup_host (acc_dev, h, 1);
-
-  if (!n)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("%p is not a mapped block", (void *)h);
-    }
-
-  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
-
-  t = n->tgt;
-
-  if (n->refcount < n->dynamic_refcount)
+  for (int i = 0; i < mapnum; i++)
     {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("Dynamic reference counting assert fail\n");
-    }
+      int kind = kinds[i] & 0xff;
+      bool copyfrom = false;
 
-  if (finalize)
-    {
-      n->refcount -= n->dynamic_refcount;
-      n->dynamic_refcount = 0;
-    }
-  else if (n->dynamic_refcount)
-    {
-      n->dynamic_refcount--;
-      n->refcount--;
-    }
+      switch (kind)
+        {
+       case GOMP_MAP_FROM:
+       case GOMP_MAP_FORCE_FROM:
+       case GOMP_MAP_ALWAYS_FROM:
+         copyfrom = true;
+         /* Fallthrough.  */
+
+       case GOMP_MAP_TO_PSET:
+       case GOMP_MAP_POINTER:
+       case GOMP_MAP_DELETE:
+       case GOMP_MAP_RELEASE:
+         cur_node.host_start = (uintptr_t) hostaddrs[i];
+         cur_node.host_end = cur_node.host_start
+                             + (kind == GOMP_MAP_POINTER
+                                ? sizeof (void *) : sizes[i]);
+         n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+         if (n == NULL)
+           continue;
+
+         assert (n->virtual_refcount != VREFCOUNT_LINK_KEY);
+
+         if (n->refcount == REFCOUNT_INFINITY)
+           {
+             n->refcount = 1;
+             n->virtual_refcount = 0;
+           }
 
-  gomp_mutex_unlock (&acc_dev->lock);
+         if (finalize)
+           {
+             n->refcount -= n->virtual_refcount;
+             n->virtual_refcount = 0;
+           }
 
-  if (n->refcount == 0)
-    {
-      if (t->refcount == minrefs)
-       {
-         /* This is the last reference, so pull the descriptor off the
-            chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
-            freeing the device memory. */
-         struct target_mem_desc *tp;
-         for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-              tp = t, t = t->prev)
+         if (n->virtual_refcount > 0)
            {
-             if (n->tgt == t)
-               {
-                 if (tp)
-                   tp->prev = t->prev;
-                 else
-                   acc_dev->openacc.data_environ = t->prev;
-                 break;
-               }
+             n->refcount--;
+             n->virtual_refcount--;
            }
-       }
+         else if (n->refcount > 0)
+           n->refcount--;
 
-      /* Set refcount to 1 to allow gomp_unmap_vars to unmap it.  */
-      n->refcount = 1;
-      t->refcount = minrefs;
-      for (size_t i = 0; i < t->list_count; i++)
-       if (t->list[i].key == n)
-         {
-           t->list[i].copy_from = force_copyfrom ? 1 : 0;
-           break;
-         }
-
-      /* If running synchronously, unmap immediately.  */
-      if (async < acc_async_noval)
-       gomp_unmap_vars (t, true);
-      else
-       {
-         goacc_aq aq = get_goacc_asyncqueue (async);
-         gomp_unmap_vars_async (t, true, aq);
+         if (copyfrom)
+           gomp_copy_dev2host (acc_dev, NULL, (void *) cur_node.host_start,
+                               (void *) (n->tgt->tgt_start + n->tgt_offset
+                                         + cur_node.host_start
+                                         - n->host_start),
+                               cur_node.host_end - cur_node.host_start);
+
+         if (n->refcount == 0)
+           gomp_remove_var (acc_dev, n);
+         break;
+
+       default:
+         gomp_mutex_unlock (&acc_dev->lock);
+         gomp_fatal ("gomp_acc_remove_pointer unhandled kind 0x%.2x",
+                     kind);
        }
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
-
-  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
 }
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 68a60de24fa..7e72d9c6b24 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -56,12 +56,29 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds)
   if (pos + 1 >= mapnum)
     return 0;
 
-  unsigned char kind = kinds[pos+1] & 0xff;
+  unsigned char kind0 = kinds[pos] & 0xff;
 
-  if (kind == GOMP_MAP_TO_PSET)
-    return 3;
-  else if (kind == GOMP_MAP_POINTER)
-    return 2;
+  switch (kind0)
+    {
+    case GOMP_MAP_TO:
+    case GOMP_MAP_FORCE_TO:
+    case GOMP_MAP_FROM:
+    case GOMP_MAP_FORCE_FROM:
+    case GOMP_MAP_TOFROM:
+    case GOMP_MAP_FORCE_TOFROM:
+    case GOMP_MAP_ALLOC:
+    case GOMP_MAP_RELEASE:
+      {
+       unsigned char kind1 = kinds[pos + 1] & 0xff;
+       if (kind1 == GOMP_MAP_POINTER
+           || kind1 == GOMP_MAP_ALWAYS_POINTER)
+         return 2;
+       else if (kind1 == GOMP_MAP_TO_PSET)
+         return 3;
+      }
+    default:
+      /* empty.  */;
+    }
 
   return 0;
 }
@@ -745,8 +762,14 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
            }
          else
            {
-             gomp_acc_insert_pointer (pointer, &hostaddrs[i],
-                                      &sizes[i], &kinds[i], async);
+             goacc_aq aq = get_goacc_asyncqueue (async);
+             for (int j = 0; j < 2; j++)
+               gomp_map_vars_async (acc_dev, aq,
+                                    (j == 0 || pointer == 2) ? 1 : 2,
+                                    &hostaddrs[i + j], NULL,
+                                    &sizes[i + j], &kinds[i + j], true,
+                                    GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+
              /* Increment 'i' by two because OpenACC requires fortran
                 arrays to be contiguous, so each PSET is associated with
                 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
@@ -791,11 +814,8 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
          }
        else
          {
-           bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
-                            || kind == GOMP_MAP_FROM);
-           gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
-                                    finalize, pointer);
-           /* See the above comment.  */
+           gomp_acc_remove_pointer (acc_dev, &hostaddrs[i], &sizes[i],
+                                    &kinds[i], async, finalize, pointer);
            i += pointer - 1;
          }
       }
diff --git a/libgomp/target.c b/libgomp/target.c
index a83cb48108a..b42b4ad2448 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -536,7 +536,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
   struct target_mem_desc *tgt
     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
   tgt->list_count = mapnum;
-  tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
+  tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
+                  || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
   tgt->device_descr = devicep;
   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
 
@@ -883,13 +884,14 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                                      kind & typemask, cbufp);
            else
              {
-               k->link_key = NULL;
+               k->u.link_key = NULL;
                if (n && n->refcount == REFCOUNT_LINK)
                  {
                    /* Replace target address of the pointer with target address
                       of mapped object in the splay tree.  */
                    splay_tree_remove (mem_map, n);
-                   k->link_key = n;
+                   k->u.link_key = n;
+                   k->virtual_refcount = VREFCOUNT_LINK_KEY;
                  }
                size_t align = (size_t) 1 << (kind >> rshift);
                tgt->list[i].key = k;
@@ -913,7 +915,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                tgt->list[i].offset = 0;
                tgt->list[i].length = k->host_end - k->host_start;
                k->refcount = 1;
-               k->dynamic_refcount = 0;
+               k->virtual_refcount = 0;
                tgt->refcount++;
                array->left = NULL;
                array->right = NULL;
@@ -1007,7 +1009,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                                kind);
                  }
 
-               if (k->link_key)
+               if (k->virtual_refcount == VREFCOUNT_LINK_KEY && k->u.link_key)
                  {
                    /* Set link pointer on target to the device address of the
                       mapped object.  */
@@ -1051,8 +1053,20 @@ gomp_map_vars_internal (struct gomp_device_descr 
*devicep,
   /* If the variable from "omp target enter data" map-list was already mapped,
      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
      gomp_exit_data.  */
-  if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
-    {
+  if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
+       || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
+      && tgt->refcount == 0)
+    {
+      /* If we're about to discard a target_mem_desc with no "structural"
+        references (tgt->refcount == 0), any splay keys linked in the tgt's
+        list must have their virtual refcount incremented to represent that
+        "lost" reference in order to implement the semantics of the OpenACC
+        "present increment" operation properly.  */
+      if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
+       for (i = 0; i < tgt->list_count; i++)
+         if (tgt->list[i].key)
+           tgt->list[i].key->virtual_refcount++;
+
       free (tgt);
       tgt = NULL;
     }
@@ -1092,32 +1106,66 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
   free (tgt);
 }
 
-attribute_hidden bool
-gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
+static bool
+gomp_unref_tgt (void *ptr)
 {
   bool is_tgt_unmapped = false;
-  splay_tree_remove (&devicep->mem_map, k);
-  if (k->link_key)
-    splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
-  if (k->tgt->refcount > 1)
-    k->tgt->refcount--;
+
+  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+
+  if (tgt->refcount > 1)
+    tgt->refcount--;
   else
     {
+      gomp_unmap_tgt (tgt);
       is_tgt_unmapped = true;
-      gomp_unmap_tgt (k->tgt);
     }
+
   return is_tgt_unmapped;
 }
 
 static void
-gomp_unref_tgt (void *ptr)
+gomp_unref_tgt_void (void *ptr)
 {
-  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+  (void) gomp_unref_tgt (ptr);
+}
 
-  if (tgt->refcount > 1)
-    tgt->refcount--;
+static inline __attribute__((always_inline)) bool
+gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
+                         struct goacc_asyncqueue *aq)
+{
+  bool is_tgt_unmapped = false;
+  splay_tree_remove (&devicep->mem_map, k);
+  if (k->virtual_refcount == VREFCOUNT_LINK_KEY)
+    {
+      if (k->u.link_key)
+       splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->u.link_key);
+    }
+  if (aq)
+    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
+                                               (void *) k->tgt);
   else
-    gomp_unmap_tgt (tgt);
+    is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
+  return is_tgt_unmapped;
+}
+
+attribute_hidden bool
+gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
+{
+  return gomp_remove_var_internal (devicep, k, NULL);
+}
+
+/* Remove a variable asynchronously.  This actually removes the variable
+   mapping immediately, but retains the linked target_mem_desc until the
+   asynchronous operation has completed (as it may still refer to target
+   memory).  The device lock must be held before entry, and remains locked on
+   exit.  */
+
+attribute_hidden void
+gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
+                      struct goacc_asyncqueue *aq)
+{
+  (void) gomp_remove_var_internal (devicep, k, aq);
 }
 
 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
@@ -1153,7 +1201,15 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, 
bool do_copyfrom,
        continue;
 
       bool do_unmap = false;
-      if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+      if (k->tgt == tgt
+         && k->virtual_refcount > 0
+         && k->virtual_refcount != VREFCOUNT_LINK_KEY
+         && k->refcount != REFCOUNT_INFINITY)
+       {
+         k->virtual_refcount--;
+         k->refcount--;
+       }
+      else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
        k->refcount--;
       else if (k->refcount == 1)
        {
@@ -1173,7 +1229,7 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, 
bool do_copyfrom,
     }
 
   if (aq)
-    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
+    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
                                                (void *) tgt);
   else
     gomp_unref_tgt ((void *) tgt);
@@ -1310,7 +1366,7 @@ gomp_load_image_to_device (struct gomp_device_descr 
*devicep, unsigned version,
       k->tgt = tgt;
       k->tgt_offset = target_table[i].start;
       k->refcount = REFCOUNT_INFINITY;
-      k->link_key = NULL;
+      k->virtual_refcount = 0;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
@@ -1342,7 +1398,7 @@ gomp_load_image_to_device (struct gomp_device_descr 
*devicep, unsigned version,
       k->tgt = tgt;
       k->tgt_offset = target_var->start;
       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
-      k->link_key = NULL;
+      k->virtual_refcount = 0;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
@@ -1576,22 +1632,6 @@ gomp_unload_device (struct gomp_device_descr *devicep)
     }
 }
 
-/* Free address mapping tables.  MM must be locked on entry, and remains locked
-   on return.  */
-
-attribute_hidden void
-gomp_free_memmap (struct splay_tree_s *mem_map)
-{
-  while (mem_map->root)
-    {
-      struct target_mem_desc *tgt = mem_map->root->key.tgt;
-
-      splay_tree_remove (mem_map, &mem_map->root->key);
-      free (tgt->array);
-      free (tgt);
-    }
-}
-
 /* Host fallback for GOMP_target{,_ext} routines.  */
 
 static void
@@ -2073,9 +2113,9 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t 
mapnum,
          if (k->refcount == 0)
            {
              splay_tree_remove (&devicep->mem_map, k);
-             if (k->link_key)
+             if (k->virtual_refcount == VREFCOUNT_LINK_KEY && k->u.link_key)
                splay_tree_insert (&devicep->mem_map,
-                                  (splay_tree_node) k->link_key);
+                                  (splay_tree_node) k->u.link_key);
              if (k->tgt->refcount > 1)
                k->tgt->refcount--;
              else
@@ -2612,6 +2652,8 @@ omp_target_associate_ptr (const void *host_ptr, const 
void *device_ptr,
       k->tgt = tgt;
       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
       k->refcount = REFCOUNT_INFINITY;
+      k->virtual_refcount = 0;
+      k->u.link_key = NULL;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
@@ -2882,7 +2924,6 @@ gomp_target_init (void)
                current_device.type = current_device.get_type_func ();
                current_device.mem_map.root = NULL;
                current_device.state = GOMP_DEVICE_UNINITIALIZED;
-               current_device.openacc.data_environ = NULL;
                for (i = 0; i < new_num_devices; i++)
                  {
                    current_device.target_id = i;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
index 6a52f746dcb..6bdcfe7d429 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
@@ -182,13 +182,13 @@ main (int argc, char **argv)
         exit (EXIT_FAILURE);
     }
 
+    acc_delete (&h_X[0], N * sizeof (float));
+    acc_delete (&h_Y1[0], N * sizeof (float));
+
     free (h_X);
     free (h_Y1);
     free (h_Y2);
 
-    acc_free (d_X);
-    acc_free (d_Y);
-
     context_check (pctx);
 
     s = cublasDestroy (h);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
index 71365e8ed32..b403a5cf5cb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
@@ -176,13 +176,13 @@ main (int argc, char **argv)
         exit (EXIT_FAILURE);
     }
 
+    acc_delete (&h_X[0], N * sizeof (float));
+    acc_delete (&h_Y1[0], N * sizeof (float));
+
     free (h_X);
     free (h_Y1);
     free (h_Y2);
 
-    acc_free (d_X);
-    acc_free (d_Y);
-
     context_check (pctx);
 
     s = cublasDestroy (h);
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 
b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
index 83a540070e6..6bb92c12ed1 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
@@ -1,4 +1,5 @@
 ! { dg-do run }
+! { dg-additional-options "-cpp" }
 
 program test
   use openacc
@@ -70,10 +71,14 @@ program test
     end do
   !$acc end parallel
   
-  !$acc exit data copyout (d(1:N)) async
+  !$acc exit data delete (c(1:N)) copyout (d(1:N)) async
   !$acc exit data async
   !$acc wait
 
+#if !ACC_MEM_SHARED
+  if (acc_is_present (c) .eqv. .TRUE.) call abort
+#endif
+
   do i = 1, N
     if (d(i) .ne. 4.0) call abort
   end do
-- 
2.23.0

Reply via email to