[ was: Re: [RFC PATCH] Coalesce host to device transfers in libgomp ]
On 10/25/2017 01:38 PM, Jakub Jelinek wrote:
And we don't really have the async target implemented yet for NVPTX:(,
guess that should be the highest priority after this optimization.
Hi,
how about this approach:
1 - Move async_run from plugin-hsa.c to default_async_run
2 - Implement omp async support for nvptx
?
The first patch moves the GOMP_OFFLOAD_async_run implementation from
plugin-hsa.c to target.c, making it the default implementation if the
plugin does not define the GOMP_OFFLOAD_async_run symbol.
The second patch removes the GOMP_OFFLOAD_async_run symbol from the
nvptx plugin, activating the default implementation, and makes sure
GOMP_OFFLOAD_run can be called from a fresh thread.
I've tested this with libgomp.c/c.exp and the previously failing
target-33.c and target-34.c are now passing, and there are no regressions.
OK for trunk after complete testing (and adding function comment for
default_async_run)?
Thanks,
- Tom
Move async_run from plugin-hsa.c to default_async_run
2017-10-27 Tom de Vries <t...@codesourcery.com>
* plugin/plugin-hsa.c (struct async_run_info): Move ...
(run_kernel_asynchronously): Rename to ...
(GOMP_OFFLOAD_async_run): Rename to ...
* target.c (struct async_run_info): ... here.
(default_async_run_1): ... this.
(default_async_run): ... this.
(gomp_target_task_fn): Handle missing async_run.
(gomp_load_plugin_for_device): Make async_run optional.
---
libgomp/plugin/plugin-hsa.c | 58 -----------------------------------------
libgomp/target.c | 63 ++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 60 insertions(+), 61 deletions(-)
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index fc08f5d..65a89a3 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -1625,64 +1625,6 @@ GOMP_OFFLOAD_run (int n __attribute__((unused)),
run_kernel (kernel, vars, kla);
}
-/* Information to be passed to a thread running a kernel asycnronously. */
-
-struct async_run_info
-{
- int device;
- void *tgt_fn;
- void *tgt_vars;
- void **args;
- void *async_data;
-};
-
-/* Thread routine to run a kernel asynchronously. */
-
-static void *
-run_kernel_asynchronously (void *thread_arg)
-{
- struct async_run_info *info = (struct async_run_info *) thread_arg;
- int device = info->device;
- void *tgt_fn = info->tgt_fn;
- void *tgt_vars = info->tgt_vars;
- void **args = info->args;
- void *async_data = info->async_data;
-
- free (info);
- GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
- GOMP_PLUGIN_target_task_completion (async_data);
- return NULL;
-}
-
-/* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
- does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
- has finished. */
-
-void
-GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
- void **args, void *async_data)
-{
- pthread_t pt;
- struct async_run_info *info;
- HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
- info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
-
- info->device = device;
- info->tgt_fn = tgt_fn;
- info->tgt_vars = tgt_vars;
- info->args = args;
- info->async_data = async_data;
-
- int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
- if (err != 0)
- GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
- strerror (err));
- err = pthread_detach (pt);
- if (err != 0)
- GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
- "asynchronously: %s", strerror (err));
-}
-
/* Deinitialize all information associated with MODULE and kernels within
it. Return TRUE on success. */
diff --git a/libgomp/target.c b/libgomp/target.c
index 3dd119f..456ed78 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1868,6 +1868,59 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
}
+/* Information to be passed to a thread running a kernel asycnronously. */
+
+struct async_run_info
+{
+ struct gomp_device_descr *devicep;
+ void *tgt_fn;
+ void *tgt_vars;
+ void **args;
+ void *async_data;
+};
+
+/* Thread routine to run a kernel asynchronously. */
+
+static void *
+default_async_run_1 (void *thread_arg)
+{
+ struct async_run_info *info = (struct async_run_info *) thread_arg;
+ struct gomp_device_descr *devicep = info->devicep;
+ void *tgt_fn = info->tgt_fn;
+ void *tgt_vars = info->tgt_vars;
+ void **args = info->args;
+ void *async_data = info->async_data;
+
+ free (info);
+ devicep->run_func (devicep->target_id, tgt_fn, tgt_vars, args);
+ GOMP_PLUGIN_target_task_completion (async_data);
+ return NULL;
+}
+
+static void
+default_async_run (struct gomp_device_descr *devicep, void *tgt_fn,
+ void *tgt_vars, void **args, void *async_data)
+{
+ pthread_t pt;
+ struct async_run_info *info;
+ info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
+
+ info->devicep = devicep;
+ info->tgt_fn = tgt_fn;
+ info->tgt_vars = tgt_vars;
+ info->args = args;
+ info->async_data = async_data;
+
+ int err = pthread_create (&pt, NULL, &default_async_run_1, info);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("Asynchronous thread creation failed: %s",
+ strerror (err));
+ err = pthread_detach (pt);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("Failed to detach a thread to run kernel "
+ "asynchronously: %s", strerror (err));
+}
+
bool
gomp_target_task_fn (void *data)
{
@@ -1909,8 +1962,12 @@ gomp_target_task_fn (void *data)
}
ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
- devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
- ttask->args, (void *) ttask);
+ if (devicep->async_run_func)
+ devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
+ ttask->args, (void *) ttask);
+ else
+ default_async_run (devicep, fn_addr, actual_arguments, ttask->args,
+ (void *) ttask);
return true;
}
else if (devicep == NULL
@@ -2393,7 +2450,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
{
DLSYM (run);
- DLSYM (async_run);
+ DLSYM_OPT (async_run, async_run);
DLSYM_OPT (can_run, can_run);
DLSYM (dev2dev);
}
Implement omp async support for nvptx
2017-10-27 Tom de Vries <t...@codesourcery.com>
PR libgomp/81688
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_run): Call
nvptx_attach_host_thread_to_device.
(GOMP_OFFLOAD_async_run): Remove.
---
libgomp/plugin/plugin-nvptx.c | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 71630b5..4e0009f 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2127,6 +2127,8 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
const char *maybe_abort_msg = "(perhaps abort was called)";
int teams = 0, threads = 0;
+ nvptx_attach_host_thread_to_device (ord);
+
if (!args)
GOMP_PLUGIN_fatal ("No target arguments provided");
while (*args)
@@ -2170,10 +2172,3 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
nvptx_stacks_free (stacks, teams * threads);
}
-
-void
-GOMP_OFFLOAD_async_run (int ord, void *tgt_fn, void *tgt_vars, void **args,
- void *async_data)
-{
- GOMP_PLUGIN_fatal ("GOMP_OFFLOAD_async_run unimplemented");
-}