[ was: Re: [RFC PATCH] Coalesce host to device transfers in libgomp ]
On 10/25/2017 01:38 PM, Jakub Jelinek wrote:
And we don't really have the async target implemented yet for NVPTX:(,
guess that should be the highest priority after this optimization.

Hi,

how about this approach:
1 - Move async_run from plugin-hsa.c to default_async_run
2 - Implement omp async support for nvptx
?

The first patch moves the GOMP_OFFLOAD_async_run implementation from plugin-hsa.c to target.c, making it the default implementation if the plugin does not define the GOMP_OFFLOAD_async_run symbol.

The second patch removes the GOMP_OFFLOAD_async_run symbol from the nvptx plugin, activating the default implementation, and makes sure GOMP_OFFLOAD_run can be called from a fresh thread.

I've tested this with libgomp.c/c.exp and the previously failing target-33.c and target-34.c are now passing, and there are no regressions.

OK for trunk after complete testing (and adding function comment for default_async_run)?

Thanks,
- Tom

Move async_run from plugin-hsa.c to default_async_run

2017-10-27  Tom de Vries  <t...@codesourcery.com>

	* plugin/plugin-hsa.c (struct async_run_info): Move ...
	(run_kernel_asynchronously): Rename to ...
	(GOMP_OFFLOAD_async_run): Rename to ...
	* target.c (struct async_run_info): ... here.
	(default_async_run_1): ... this.
	(default_async_run): ... this.
	(gomp_target_task_fn): Handle missing async_run.
	(gomp_load_plugin_for_device): Make async_run optional.

---
 libgomp/plugin/plugin-hsa.c | 58 -----------------------------------------
 libgomp/target.c            | 63 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index fc08f5d..65a89a3 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -1625,64 +1625,6 @@ GOMP_OFFLOAD_run (int n __attribute__((unused)),
   run_kernel (kernel, vars, kla);
 }
 
-/* Information to be passed to a thread running a kernel asycnronously.  */
-
-struct async_run_info
-{
-  int device;
-  void *tgt_fn;
-  void *tgt_vars;
-  void **args;
-  void *async_data;
-};
-
-/* Thread routine to run a kernel asynchronously.  */
-
-static void *
-run_kernel_asynchronously (void *thread_arg)
-{
-  struct async_run_info *info = (struct async_run_info *) thread_arg;
-  int device = info->device;
-  void *tgt_fn = info->tgt_fn;
-  void *tgt_vars = info->tgt_vars;
-  void **args = info->args;
-  void *async_data = info->async_data;
-
-  free (info);
-  GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
-  GOMP_PLUGIN_target_task_completion (async_data);
-  return NULL;
-}
-
-/* Part of the libgomp plugin interface.  Run a kernel like GOMP_OFFLOAD_run
-   does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
-   has finished.  */
-
-void
-GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
-			void **args, void *async_data)
-{
-  pthread_t pt;
-  struct async_run_info *info;
-  HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
-  info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
-
-  info->device = device;
-  info->tgt_fn = tgt_fn;
-  info->tgt_vars = tgt_vars;
-  info->args = args;
-  info->async_data = async_data;
-
-  int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
-  if (err != 0)
-    GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
-		       strerror (err));
-  err = pthread_detach (pt);
-  if (err != 0)
-    GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
-		       "asynchronously: %s", strerror (err));
-}
-
 /* Deinitialize all information associated with MODULE and kernels within
    it.  Return TRUE on success.  */
 
diff --git a/libgomp/target.c b/libgomp/target.c
index 3dd119f..456ed78 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1868,6 +1868,59 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
 }
 
+/* Information to be passed to a thread running a kernel asycnronously.  */
+
+struct async_run_info
+{
+  struct gomp_device_descr *devicep;
+  void *tgt_fn;
+  void *tgt_vars;
+  void **args;
+  void *async_data;
+};
+
+/* Thread routine to run a kernel asynchronously.  */
+
+static void *
+default_async_run_1 (void *thread_arg)
+{
+  struct async_run_info *info = (struct async_run_info *) thread_arg;
+  struct gomp_device_descr *devicep = info->devicep;
+  void *tgt_fn = info->tgt_fn;
+  void *tgt_vars = info->tgt_vars;
+  void **args = info->args;
+  void *async_data = info->async_data;
+
+  free (info);
+  devicep->run_func (devicep->target_id, tgt_fn, tgt_vars, args);
+  GOMP_PLUGIN_target_task_completion (async_data);
+  return NULL;
+}
+
+static void
+default_async_run (struct gomp_device_descr *devicep, void *tgt_fn,
+		   void *tgt_vars, void **args, void *async_data)
+{
+  pthread_t pt;
+  struct async_run_info *info;
+  info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
+
+  info->devicep = devicep;
+  info->tgt_fn = tgt_fn;
+  info->tgt_vars = tgt_vars;
+  info->args = args;
+  info->async_data = async_data;
+
+ int err = pthread_create (&pt, NULL, &default_async_run_1, info);
+  if (err != 0)
+    GOMP_PLUGIN_fatal ("Asynchronous thread creation failed: %s",
+		       strerror (err));
+  err = pthread_detach (pt);
+  if (err != 0)
+    GOMP_PLUGIN_fatal ("Failed to detach a thread to run kernel "
+		       "asynchronously: %s", strerror (err));
+}
+
 bool
 gomp_target_task_fn (void *data)
 {
@@ -1909,8 +1962,12 @@ gomp_target_task_fn (void *data)
 	}
       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
 
-      devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
-			       ttask->args, (void *) ttask);
+      if (devicep->async_run_func)
+	devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
+				 ttask->args, (void *) ttask);
+      else
+	default_async_run (devicep, fn_addr, actual_arguments, ttask->args,
+			   (void *) ttask);
       return true;
     }
   else if (devicep == NULL
@@ -2393,7 +2450,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
     {
       DLSYM (run);
-      DLSYM (async_run);
+      DLSYM_OPT (async_run, async_run);
       DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
     }
Implement omp async support for nvptx

2017-10-27  Tom de Vries  <t...@codesourcery.com>

	PR libgomp/81688
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_run): Call
	nvptx_attach_host_thread_to_device.
	(GOMP_OFFLOAD_async_run): Remove.

---
 libgomp/plugin/plugin-nvptx.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 71630b5..4e0009f 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2127,6 +2127,8 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
   const char *maybe_abort_msg = "(perhaps abort was called)";
   int teams = 0, threads = 0;
 
+  nvptx_attach_host_thread_to_device (ord);
+
   if (!args)
     GOMP_PLUGIN_fatal ("No target arguments provided");
   while (*args)
@@ -2170,10 +2172,3 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
     GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
   nvptx_stacks_free (stacks, teams * threads);
 }
-
-void
-GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
-			void *async_data)
-{
-  GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
-}

Reply via email to