When re-reading the current plugin code, I noticed that 'interop' created the cuStream created in the current CUDA context (current CUDA device) and not on the specified device.

That's obviously the same if there is only a single nvptx device.

The patch mimics what other code in the plugin uses and has been lightly tested so far.

Comments before I push it?

Tobias
libgomp/plugin/plugin-nvptx.c: Fix device used for stream creation

libgomp/ChangeLog:

	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_interop): Set context for
	stream creation to use the specified device.

 libgomp/plugin/plugin-nvptx.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 822c6a410e2..a5cf859db19 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2483,12 +2483,26 @@ GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
 	  break;
       }
 
-  obj->device_data = ptx_devices[ord];
+  struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord];
 
   if (targetsync)
     {
       CUstream stream = NULL;
-      CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+      CUdevice cur_ctx_dev;
+      CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev);
+      if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT)
+	GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res));
+      if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev)
+	CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+      else
+	{
+	  CUcontext old_ctx;
+	  assert (ptx_dev->ctx);
+	  CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx);
+	  CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
+	  if (res != CUDA_ERROR_INVALID_CONTEXT)
+	    CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
+	}
       obj->stream = stream;
     }
 }

Reply via email to