When re-reading the current plugin code, I noticed that 'interop'
created the cuStream created in the current CUDA context (current CUDA
device) and not on the specified device.
That's obviously the same if there is only a single nvptx device.
The patch mimics what other code in the plugin uses and has been lightly
tested so far.
Comments before I push it?
Tobias
libgomp/plugin/plugin-nvptx.c: Fix device used for stream creation
libgomp/ChangeLog:
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_interop): Set context for
stream creation to use the specified device.
libgomp/plugin/plugin-nvptx.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 822c6a410e2..a5cf859db19 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2483,12 +2483,26 @@ GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
break;
}
- obj->device_data = ptx_devices[ord];
+ struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord];
if (targetsync)
{
CUstream stream = NULL;
- CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ CUdevice cur_ctx_dev;
+ CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev);
+ if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT)
+ GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res));
+ if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev)
+ CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ else
+ {
+ CUcontext old_ctx;
+ assert (ptx_dev->ctx);
+ CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx);
+ CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+ if (res != CUDA_ERROR_INVALID_CONTEXT)
+ CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
+ }
obj->stream = stream;
}
}