On Thu, Jul 09, 2015 at 04:06:57PM +0200, Jakub Jelinek wrote:
> The latest spec adds a bunch of new functions, this patch attempts to
> implement them, except I gave up partly in omp_target_associate_ptr
> and completely in omp_target_disassociate_ptr for now.
> 
> As for the plugins, I think we'll want some plugin callback to support
> offloading device <-> offloading device memcpy (at least for the same
> devicep and target_id), and perhaps as optimization also some
> callbacks through which 2 or 3 dimensional omp_target_memcpy_rect
> in between host and device, or device and host, or device to same device
> can be optimized to avoid too many separate operations.
> 
> For the associate/disassociate, I'm waiting for some clarifications (well,
> for omp_target_is_present too) and then supposedly it should wait until
> you are done with your enter/exit data changes.
> 
> Shall I commit this now, or wait until it is clarified etc.?

Here is a new version that I've committed.  I've finished up
associate/disassociate, wrote a test and tested also with intelmicemul
offloading.

2015-07-13  Jakub Jelinek  <ja...@redhat.com>

        * omp.h.in (omp_get_initial_device,
        omp_target_alloc, omp_target_free, omp_target_is_present,
        omp_target_memcpy, omp_target_memcpy_rect, omp_target_associate_ptr,
        omp_target_disassociate_ptr): New prototypes.
        * omp_lib.f90.in (omp_get_initial_device): New interface.
        * omp_lib.h.in (omp_get_initial_device): New extern.
        * libgomp.map (OMP_4.1): Add omp_get_initial_device,
        omp_get_initial_device_, omp_target_alloc, omp_target_free,
        omp_target_is_present, omp_target_memcpy, omp_target_memcpy_rect,
        omp_target_associate_ptr and omp_target_disassociate_ptr symbols.
        * env.c (omp_get_initial_device): New function.  Add ialias.
        * fortran.c (omp_get_initial_device): Add iredirect.
        (omp_get_initial_device_): New function.
        * target.c (gomp_map_vars_existing, gomp_map_vars, gomp_unmap_vars):
        Handle refcount of INT_MAX as infinite.
        (gomp_offload_image_to_device): Set refcount to INT_MAX.
        (omp_target_alloc, omp_target_free, omp_target_is_present,
        omp_target_memcpy, omp_target_memcpy_rect_worker,
        omp_target_memcpy_rect, omp_target_associate_ptr,
        omp_target_disassociate_ptr): New functions.
        * testsuite/libgomp.c/target-12.c: New test.

--- libgomp/omp.h.in.jj 2015-07-10 14:42:57.968695046 +0200
+++ libgomp/omp.h.in    2015-07-10 18:49:17.503845297 +0200
@@ -139,8 +139,25 @@ extern int omp_get_num_teams (void) __GO
 extern int omp_get_team_num (void) __GOMP_NOTHROW;
 
 extern int omp_is_initial_device (void) __GOMP_NOTHROW;
+extern int omp_get_initial_device (void) __GOMP_NOTHROW;
 extern int omp_get_max_task_priority (void) __GOMP_NOTHROW;
 
+extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern void omp_target_free (void *, int) __GOMP_NOTHROW;
+extern int omp_target_is_present (void *, __SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__,
+                             __SIZE_TYPE__, int, int) __GOMP_NOTHROW;
+extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int,
+                                  const __SIZE_TYPE__ *,
+                                  const __SIZE_TYPE__ *,
+                                  const __SIZE_TYPE__ *,
+                                  const __SIZE_TYPE__ *,
+                                  const __SIZE_TYPE__ *, int, int)
+  __GOMP_NOTHROW;
+extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__,
+                                    __SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW;
+
 #ifdef __cplusplus
 }
 #endif
--- libgomp/target.c.jj 2015-07-10 14:42:58.034694042 +0200
+++ libgomp/target.c    2015-07-13 14:48:11.681954879 +0200
@@ -38,6 +38,7 @@
 #endif
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
 
 #ifdef PLUGIN_SUPPORT
 #include <dlfcn.h>
@@ -171,7 +172,8 @@ gomp_map_vars_existing (struct gomp_devi
                            (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
                            (void *) newn->host_start,
                            newn->host_end - newn->host_start);
-  oldn->refcount++;
+  if (oldn->refcount != INT_MAX)
+    oldn->refcount++;
 }
 
 static int
@@ -436,7 +438,8 @@ gomp_map_vars (struct gomp_device_descr
                          tgt->list[j].key = k;
                          tgt->list[j].copy_from = false;
                          tgt->list[j].always_copy_from = false;
-                         k->refcount++;
+                         if (k->refcount != INT_MAX)
+                           k->refcount++;
                          gomp_map_pointer (tgt,
                                            (uintptr_t) *(void **) hostaddrs[j],
                                            k->tgt_offset
@@ -576,7 +579,10 @@ gomp_unmap_vars (struct target_mem_desc
 
       bool do_unmap = false;
       if (k->refcount > 1)
-       k->refcount--;
+       {
+         if (k->refcount != INT_MAX)
+           k->refcount--;
+       }
       else if (k->async_refcount > 0)
        k->async_refcount--;
       else
@@ -721,7 +727,7 @@ gomp_offload_image_to_device (struct gom
       k->host_end = k->host_start + 1;
       k->tgt = tgt;
       k->tgt_offset = target_table[i].start;
-      k->refcount = 1;
+      k->refcount = INT_MAX;
       k->async_refcount = 0;
       array->left = NULL;
       array->right = NULL;
@@ -746,7 +752,7 @@ gomp_offload_image_to_device (struct gom
       k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
       k->tgt = tgt;
       k->tgt_offset = target_var->start;
-      k->refcount = 1;
+      k->refcount = INT_MAX;
       k->async_refcount = 0;
       array->left = NULL;
       array->right = NULL;
@@ -1175,6 +1181,390 @@ GOMP_teams (unsigned int num_teams, unsi
   (void) num_teams;
 }
 
+void *
+omp_target_alloc (size_t size, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return malloc (size);
+
+  if (device_num < 0)
+    return NULL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return NULL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return malloc (size);
+
+  gomp_mutex_lock (&devicep->lock);
+  void *ret = devicep->alloc_func (devicep->target_id, size);
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+void
+omp_target_free (void *device_ptr, int device_num)
+{
+  if (device_ptr == NULL)
+    return;
+
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    {
+      free (device_ptr);
+      return;
+    }
+
+  if (device_num < 0)
+    return;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    {
+      free (device_ptr);
+      return;
+    }
+
+  gomp_mutex_lock (&devicep->lock);
+  devicep->free_func (devicep->target_id, device_ptr);
+  gomp_mutex_unlock (&devicep->lock);
+}
+
+int
+omp_target_is_present (void *ptr, size_t offset, int device_num)
+{
+  if (ptr == NULL)
+    return offset == 0;
+
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return 1;
+
+  if (device_num < 0)
+    return 0;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return 0;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return 1;
+
+  gomp_mutex_lock (&devicep->lock);
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+
+  cur_node.host_start = (uintptr_t) ptr + offset;
+  cur_node.host_end = cur_node.host_start + 1;
+  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+  if (n == NULL)
+    {
+      /* Could be possibly zero size array section.  */
+      cur_node.host_end--;
+      n = splay_tree_lookup (mem_map, &cur_node);
+      if (n == NULL)
+       {
+         cur_node.host_start--;
+         n = splay_tree_lookup (mem_map, &cur_node);
+         cur_node.host_start++;
+       }
+    }
+  int ret = n != NULL;
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+int
+omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
+                  size_t src_offset, int dst_device_num, int src_device_num)
+{
+  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (dst_device_num < 0)
+       return EINVAL;
+
+      dst_devicep = resolve_device (dst_device_num);
+      if (dst_devicep == NULL)
+       return EINVAL;
+
+      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       dst_devicep = NULL;
+    }
+  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (src_device_num < 0)
+       return EINVAL;
+
+      src_devicep = resolve_device (src_device_num);
+      if (src_devicep == NULL)
+       return EINVAL;
+
+      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       src_devicep = NULL;
+    }
+  if (src_devicep == NULL && dst_devicep == NULL)
+    {
+      memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
+      return 0;
+    }
+  if (src_devicep == NULL)
+    {
+      gomp_mutex_lock (&dst_devicep->lock);
+      dst_devicep->host2dev_func (dst_devicep->target_id,
+                                 (char *) dst + dst_offset,
+                                 (char *) src + src_offset, length);
+      gomp_mutex_unlock (&dst_devicep->lock);
+      return 0;
+    }
+  if (dst_devicep == NULL)
+    {
+      gomp_mutex_lock (&src_devicep->lock);
+      src_devicep->dev2host_func (src_devicep->target_id,
+                                 (char *) dst + dst_offset,
+                                 (char *) src + src_offset, length);
+      gomp_mutex_unlock (&src_devicep->lock);
+      return 0;
+    }
+  /* FIXME: Support device-to-device somehow?  */
+  return EINVAL;
+}
+
+static int
+omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
+                              int num_dims, const size_t *volume,
+                              const size_t *dst_offsets,
+                              const size_t *src_offsets,
+                              const size_t *dst_dimensions,
+                              const size_t *src_dimensions,
+                              struct gomp_device_descr *dst_devicep,
+                              struct gomp_device_descr *src_devicep)
+{
+  size_t dst_slice = element_size;
+  size_t src_slice = element_size;
+  size_t j, dst_off, src_off, length;
+  int i, ret;
+
+  if (num_dims == 1)
+    {
+      if (__builtin_mul_overflow (element_size, volume[0], &length)
+         || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
+         || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
+       return EINVAL;
+      if (dst_devicep == NULL && src_devicep == NULL)
+       memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+      else if (src_devicep == NULL)
+       dst_devicep->host2dev_func (dst_devicep->target_id,
+                                   (char *) dst + dst_off,
+                                   (char *) src + src_off, length);
+      else if (dst_devicep == NULL)
+       src_devicep->dev2host_func (src_devicep->target_id,
+                                   (char *) dst + dst_off,
+                                   (char *) src + src_off, length);
+      else
+       return EINVAL;
+      return 0;
+    }
+
+  /* FIXME: it would be nice to have some plugin function to handle
+     num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
+     be handled in the generic recursion below, and for host-host it
+     should be used even for any num_dims >= 2.  */
+
+  for (i = 1; i < num_dims; i++)
+    if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
+       || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
+      return EINVAL;
+  if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
+      || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
+    return EINVAL;
+  for (j = 0; j < volume[0]; j++)
+    {
+      ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
+                                          (char *) src + src_off,
+                                          element_size, num_dims - 1,
+                                          volume + 1, dst_offsets + 1,
+                                          src_offsets + 1, dst_dimensions + 1,
+                                          src_dimensions + 1, dst_devicep,
+                                          src_devicep);
+      if (ret)
+       return ret;
+      dst_off += dst_slice;
+      src_off += src_slice;
+    }
+  return 0;
+}
+
+int
+omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
+                       int num_dims, const size_t *volume,
+                       const size_t *dst_offsets,
+                       const size_t *src_offsets,
+                       const size_t *dst_dimensions,
+                       const size_t *src_dimensions,
+                       int dst_device_num, int src_device_num)
+{
+  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+  if (!dst && !src)
+    return INT_MAX;
+
+  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (dst_device_num < 0)
+       return EINVAL;
+
+      dst_devicep = resolve_device (dst_device_num);
+      if (dst_devicep == NULL)
+       return EINVAL;
+
+      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       dst_devicep = NULL;
+    }
+  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (src_device_num < 0)
+       return EINVAL;
+
+      src_devicep = resolve_device (src_device_num);
+      if (src_devicep == NULL)
+       return EINVAL;
+
+      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       src_devicep = NULL;
+    }
+
+  /* FIXME: Support device-to-device somehow?  */
+  if (src_devicep != NULL && dst_devicep != NULL)
+    return EINVAL;
+
+  if (src_devicep)
+    gomp_mutex_lock (&src_devicep->lock);
+  else if (dst_devicep)
+    gomp_mutex_lock (&dst_devicep->lock);
+  int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
+                                          volume, dst_offsets, src_offsets,
+                                          dst_dimensions, src_dimensions,
+                                          dst_devicep, src_devicep);
+  if (src_devicep)
+    gomp_mutex_unlock (&src_devicep->lock);
+  else if (dst_devicep)
+    gomp_mutex_unlock (&dst_devicep->lock);
+  return ret;
+}
+
+int
+omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
+                         size_t device_offset, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return EINVAL;
+
+  if (device_num < 0)
+    return EINVAL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return EINVAL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return EINVAL;
+
+  gomp_mutex_lock (&devicep->lock);
+
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+  int ret = EINVAL;
+
+  cur_node.host_start = (uintptr_t) host_ptr;
+  cur_node.host_end = cur_node.host_start + size;
+  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+  if (n)
+    {
+      if (n->tgt->tgt_start + n->tgt_offset
+         == (uintptr_t) device_ptr + device_offset
+         && n->host_start <= cur_node.host_start
+         && n->host_end >= cur_node.host_end)
+       ret = 0;
+    }
+  else
+    {
+      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+      tgt->array = gomp_malloc (sizeof (*tgt->array));
+      tgt->refcount = 1;
+      tgt->tgt_start = 0;
+      tgt->tgt_end = 0;
+      tgt->to_free = NULL;
+      tgt->prev = NULL;
+      tgt->list_count = 0;
+      tgt->device_descr = devicep;
+      splay_tree_node array = tgt->array;
+      splay_tree_key k = &array->key;
+      k->host_start = cur_node.host_start;
+      k->host_end = cur_node.host_end;
+      k->tgt = tgt;
+      k->tgt_offset = (uintptr_t) device_ptr + device_offset;
+      k->refcount = INT_MAX;
+      k->async_refcount = 0;
+      array->left = NULL;
+      array->right = NULL;
+      splay_tree_insert (&devicep->mem_map, array);
+      ret = 0;
+    }
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+int
+omp_target_disassociate_ptr (void *ptr, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return EINVAL;
+
+  if (device_num < 0)
+    return EINVAL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return EINVAL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return EINVAL;
+
+  gomp_mutex_lock (&devicep->lock);
+
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+  int ret = EINVAL;
+
+  cur_node.host_start = (uintptr_t) ptr;
+  cur_node.host_end = cur_node.host_start + 1;
+  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+  if (n == NULL)
+    {
+      cur_node.host_end--;
+      n = splay_tree_lookup (mem_map, &cur_node);
+    }
+  if (n
+      && n->host_start == cur_node.host_start
+      && n->refcount == INT_MAX
+      && n->tgt->tgt_start == 0
+      && n->tgt->to_free == NULL
+      && n->tgt->refcount == 1
+      && n->tgt->list_count == 0)
+    {
+      splay_tree_remove (&devicep->mem_map, n);
+      gomp_unmap_tgt (n->tgt);
+      ret = 0;
+    }
+
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
 #ifdef PLUGIN_SUPPORT
 
 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
--- libgomp/omp_lib.h.in.jj     2015-07-10 14:42:58.049693814 +0200
+++ libgomp/omp_lib.h.in        2015-07-10 18:49:17.516844753 +0200
@@ -121,6 +121,8 @@
 
       external omp_is_initial_device
       logical(4) omp_is_initial_device
+      external omp_get_initial_device
+      integer(4) omp_get_initial_device
 
       external omp_get_max_task_priority
       integer(4) omp_get_max_task_priority
--- libgomp/fortran.c.jj        2015-07-10 14:42:58.063693601 +0200
+++ libgomp/fortran.c   2015-07-10 18:49:17.548843415 +0200
@@ -80,6 +80,7 @@ ialias_redirect (omp_get_num_devices)
 ialias_redirect (omp_get_num_teams)
 ialias_redirect (omp_get_team_num)
 ialias_redirect (omp_is_initial_device)
+ialias_redirect (omp_get_initial_device)
 ialias_redirect (omp_get_max_task_priority)
 #endif
 
@@ -565,6 +566,12 @@ omp_is_initial_device_ (void)
 }
 
 int32_t
+omp_get_initial_device_ (void)
+{
+  return omp_get_initial_device ();
+}
+
+int32_t
 omp_get_max_task_priority_ (void)
 {
   return omp_get_max_task_priority ();
--- libgomp/libgomp.map.jj      2015-07-10 14:42:57.899696095 +0200
+++ libgomp/libgomp.map 2015-07-10 18:49:17.528844251 +0200
@@ -153,6 +153,15 @@ OMP_4.1 {
        omp_get_partition_place_nums;
        omp_get_partition_place_nums_;
        omp_get_partition_place_nums_8_;
+       omp_get_initial_device;
+       omp_get_initial_device_;
+       omp_target_alloc;
+       omp_target_free;
+       omp_target_is_present;
+       omp_target_memcpy;
+       omp_target_memcpy_rect;
+       omp_target_associate_ptr;
+       omp_target_disassociate_ptr;
 } OMP_4.0;
 
 GOMP_1.0 {
--- libgomp/omp_lib.f90.in.jj   2015-07-10 14:42:57.958695198 +0200
+++ libgomp/omp_lib.f90.in      2015-07-10 18:49:17.507845129 +0200
@@ -422,6 +422,12 @@
         end interface
 
         interface
+          function omp_get_initial_device ()
+            integer (4) :: omp_get_initial_device
+          end function omp_get_initial_device
+        end interface
+
+        interface
           function omp_get_max_task_priority ()
             integer (4) :: omp_get_max_task_priority
           end function omp_get_max_task_priority
--- libgomp/env.c.jj    2015-07-10 14:42:57.932695593 +0200
+++ libgomp/env.c       2015-07-10 18:49:17.548843415 +0200
@@ -29,6 +29,7 @@
 #include "libgomp.h"
 #include "libgomp_f.h"
 #include "oacc-int.h"
+#include "gomp-constants.h"
 #include <ctype.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -1461,6 +1462,12 @@ omp_is_initial_device (void)
 }
 
 int
+omp_get_initial_device (void)
+{
+  return GOMP_DEVICE_HOST_FALLBACK;
+}
+
+int
 omp_get_num_places (void)
 {
   return gomp_places_list_len;
@@ -1526,6 +1533,7 @@ ialias (omp_get_num_devices)
 ialias (omp_get_num_teams)
 ialias (omp_get_team_num)
 ialias (omp_is_initial_device)
+ialias (omp_get_initial_device)
 ialias (omp_get_max_task_priority)
 ialias (omp_get_num_places)
 ialias (omp_get_place_num)
--- libgomp/testsuite/libgomp.c/target-12.c.jj  2015-07-13 13:02:51.368474480 
+0200
+++ libgomp/testsuite/libgomp.c/target-12.c     2015-07-13 14:49:24.361043766 
+0200
@@ -0,0 +1,114 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int d = omp_get_default_device ();
+  int id = omp_get_initial_device ();
+  int err;
+  int q[128], i;
+  void *p;
+
+  if (d < 0 || d >= omp_get_num_devices ())
+    d = id;
+
+  for (i = 0; i < 128; i++)
+    q[i] = i;
+
+  p = omp_target_alloc (130 * sizeof (int), d);
+  if (p == NULL)
+    return 0;
+
+  if (omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL,
+                             d, id) < 3
+      || omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL,
+                                NULL, id, d) < 3
+      || omp_target_memcpy_rect (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL,
+                                NULL, id, id) < 3)
+    abort ();
+
+  if (omp_target_associate_ptr (q, p, 128 * sizeof (int), sizeof (int), d) == 
0)
+    {
+      size_t volume[3] = { 128, 0, 0 };
+      size_t dst_offsets[3] = { 0, 0, 0 };
+      size_t src_offsets[3] = { 1, 0, 0 };
+      size_t dst_dimensions[3] = { 128, 0, 0 };
+      size_t src_dimensions[3] = { 128, 0, 0 };
+
+      if (omp_target_associate_ptr (q, p, 128 * sizeof (int), sizeof (int), d) 
!= 0)
+       abort ();
+
+      if (omp_target_is_present (q, 0, d) != 1
+         || omp_target_is_present (q, 32 * sizeof (int), d) != 1
+         || omp_target_is_present (q, 128 * sizeof (int), d) != 1)
+       abort ();
+
+      if (omp_target_memcpy (p, q, 128 * sizeof (int), sizeof (int), 0,
+                            d, id) != 0)
+       abort ();
+
+      #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) 
map(alloc:q[0:32]) map(from:err)
+      {
+       int j;
+       err = 0;
+       for (j = 0; j < 128; j++)
+         if (q[j] != j)
+           err = 1;
+         else
+           q[j] += 4;
+      }
+
+      if (err)
+       abort ();
+
+      if (omp_target_memcpy_rect (q, p, sizeof (int), 1, volume,
+                                 dst_offsets, src_offsets, dst_dimensions,
+                                 src_dimensions, id, d) != 0)
+       abort ();
+
+      for (i = 0; i < 128; i++)
+       if (q[i] != i + 4)
+         abort ();
+
+      volume[2] = 2;
+      volume[1] = 3;
+      volume[0] = 6;
+      dst_offsets[2] = 1;
+      dst_offsets[1] = 0;
+      dst_offsets[0] = 0;
+      src_offsets[2] = 1;
+      src_offsets[1] = 0;
+      src_offsets[0] = 3;
+      dst_dimensions[2] = 2;
+      dst_dimensions[1] = 3;
+      dst_dimensions[0] = 6;
+      src_dimensions[2] = 3;
+      src_dimensions[1] = 4;
+      src_dimensions[0] = 6;
+      if (omp_target_memcpy_rect (p, q, sizeof (int), 3, volume,
+                                 dst_offsets, src_offsets, dst_dimensions,
+                                 src_dimensions, d, id) != 0)
+       abort ();
+
+      #pragma omp target if (d >= 0) device (d >= 0 ? d : 0) 
map(alloc:q[0:32]) map(from:err)
+      {
+       int j, k, l;
+       err = 0;
+       for (j = 0; j < 6; j++)
+         for (k = 0; k < 3; k++)
+           for (l = 0; l < 2; l++)
+             if (q[j * 6 + k * 2 + l] != 3 * 12 + 4 + 1 + l + k * 3 + j * 12)
+               err = 1;
+      }
+
+      if (err)
+       abort ();
+
+      if (omp_target_disassociate_ptr (q, d) != 0)
+       abort ();
+    }
+
+  omp_target_free (p, d);
+  return 0;
+}


        Jakub

Reply via email to