Hi Julian! On 2019-10-03T09:35:04-0700, Julian Brown <jul...@codesourcery.com> wrote: > --- a/libgomp/oacc-mem.c > +++ b/libgomp/oacc-mem.c
> @@ -715,48 +684,34 @@ delete_copyout (unsigned f, void *h, size_t s, int > async, const char *libfnname) > if (f & FLAG_COPYOUT) > [...] > gomp_copy_dev2host (acc_dev, aq, h, d, s); > } > - gomp_remove_var (acc_dev, n); > + gomp_remove_var_async (acc_dev, n, aq); Conceptually, I understand correctly that we need to use this (new) 'gomp_remove_var_async' to make sure that we don't 'gomp_free_device_memory' while the 'gomp_copy_dev2host' cited above is still in process? I'm curious why this isn't causing any problems for nvptx offloading already, any thoughts on that? Or, is this just missing test coverage? (Always difficult for 'async' stuff, of course.) By chance, is this right now already causing problems with AMD GCN offloading? (I really need to set up AMD GCN offloading testing...) I'm citing below the changes introducing 'gomp_remove_var_async', modelled similar to the existing 'gomp_unmap_vars_async'. Also for both these, do I understand correctly, that it's actually not the 'gomp_unref_tgt' that needs to be "delayed" via 'goacc_asyncqueue', but rather really only the 'gomp_free_device_memory', called via 'gomp_unmap_tgt', called via 'gomp_unref_tgt'? In other words: why do we need to keep the 'struct target_mem_desc' alive? Per my understanding, that one is one component of the mapping table, and not relevant anymore (thus can be 'free'd) as soon as it has been determined that 'tgt->refcount == 0'? Am I missing something there? It will be OK to clean that up later, but I'd like to understand this now. Well, or, stating that you just blindly copied that from the existing 'gomp_unmap_vars_async' is fine, too! ;-P Grüße Thomas > --- a/libgomp/target.c > +++ b/libgomp/target.c > @@ -1092,32 +1106,66 @@ gomp_unmap_tgt (struct target_mem_desc *tgt) > free (tgt); > } > > -attribute_hidden bool > -gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) > +static bool > +gomp_unref_tgt (void *ptr) > { > bool is_tgt_unmapped = false; > - splay_tree_remove (&devicep->mem_map, k); > - if (k->link_key) > - splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key); > - if (k->tgt->refcount > 1) > - k->tgt->refcount--; > + > + struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; > + > + if (tgt->refcount > 1) > + tgt->refcount--; > else > { > + gomp_unmap_tgt (tgt); > is_tgt_unmapped = true; > - gomp_unmap_tgt (k->tgt); > } > + > return is_tgt_unmapped; > } > > static void > -gomp_unref_tgt (void *ptr) > +gomp_unref_tgt_void (void *ptr) > { > - struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; > + (void) gomp_unref_tgt (ptr); > +} > > - if (tgt->refcount > 1) > - tgt->refcount--; > +static inline __attribute__((always_inline)) bool > +gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key > k, > + struct goacc_asyncqueue *aq) > +{ > + bool is_tgt_unmapped = false; > + splay_tree_remove (&devicep->mem_map, k); > + if (k->virtual_refcount == VREFCOUNT_LINK_KEY) > + { > + if (k->u.link_key) > + splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->u.link_key); > + } > + if (aq) > + devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, > + (void *) k->tgt); > else > - gomp_unmap_tgt (tgt); > + is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt); > + return is_tgt_unmapped; > +} > + > +attribute_hidden bool > +gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) > +{ > + return gomp_remove_var_internal (devicep, k, NULL); > +} > + > +/* Remove a variable asynchronously. This actually removes the variable > + mapping immediately, but retains the linked target_mem_desc until the > + asynchronous operation has completed (as it may still refer to target > + memory). The device lock must be held before entry, and remains locked on > + exit. */ > + > +attribute_hidden void > +gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, > + struct goacc_asyncqueue *aq) > +{ > + (void) gomp_remove_var_internal (devicep, k, aq); > }
signature.asc
Description: PGP signature