This is a minimal patch for NVPTX OpenMP offloading, using Jakub's initial
implementation.  It allows to successfully run '#pragma omp target', without
any parallel execution: 1 team of 1 thread is spawned on the device, and
target regions with '#pragma omp parallel' will fail with a link error.

        * plugin/plugin-nvptx.c (nvptx_host2dev): Allow NULL 'nvthd'.
        (nvptx_dev2host): Ditto.
        (GOMP_OFFLOAD_get_caps): Add GOMP_OFFLOAD_CAP_OPENMP_400.
        (GOMP_OFFLOAD_run): New.
---
 libgomp/plugin/plugin-nvptx.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 52c49c7..a3eaafa 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1052,7 +1052,7 @@ nvptx_host2dev (void *d, const void *h, size_t s)
     GOMP_PLUGIN_fatal ("invalid size");
 
 #ifndef DISABLE_ASYNC
-  if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+  if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
     {
       CUevent *e;
 
@@ -1117,7 +1117,7 @@ nvptx_dev2host (void *h, const void *d, size_t s)
     GOMP_PLUGIN_fatal ("invalid size");
 
 #ifndef DISABLE_ASYNC
-  if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+  if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
     {
       CUevent *e;
 
@@ -1451,7 +1451,7 @@ GOMP_OFFLOAD_get_name (void)
 unsigned int
 GOMP_OFFLOAD_get_caps (void)
 {
-  return GOMP_OFFLOAD_CAP_OPENACC_200;
+  return GOMP_OFFLOAD_CAP_OPENACC_200 | GOMP_OFFLOAD_CAP_OPENMP_400;
 }
 
 int
@@ -1788,3 +1788,27 @@ GOMP_OFFLOAD_openacc_set_cuda_stream (int async, void 
*stream)
 {
   return nvptx_set_cuda_stream (async, stream);
 }
+
+void
+GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars)
+{
+  CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn;
+  CUresult r;
+  struct ptx_device *ptx_dev = ptx_devices[ord];
+  const char *maybe_abort_msg = "(perhaps abort was called)";
+  void *args = &tgt_vars;
+
+  r = cuLaunchKernel (function,
+                     1, 1, 1,
+                     1, 1, 1,
+                     0, ptx_dev->null_stream->stream, &args, 0);
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
+
+  r = cuCtxSynchronize ();
+  if (r == CUDA_ERROR_LAUNCH_FAILED)
+    GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
+                      maybe_abort_msg);
+  else if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
+}

Reply via email to