https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88939
--- Comment #1 from Tom de Vries <vries at gcc dot gnu.org> ---
The usual fix for this sort of problem is to move the map_pop to before the
GOMP_PLUGIN_fatal:
...
@@ -1365,6 +1365,7 @@ nvptx_exec
if (async < acc_async_noval)
{
r = CUDA_CALL_NOCHECK (cuStreamSynchronize, dev_str->stream);
+ map_pop (dev_str);
if (r == CUDA_ERROR_LAUNCH_FAILED)
GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s %s\n", cuda_error
(r),
maybe_abort_msg);
@@ -1392,6 +1393,7 @@ nvptx_exec
}
#else
r = CUDA_CALL_NOCHECK (cuCtxSynchronize, );
+ map_pop (dev_str);
if (r == CUDA_ERROR_LAUNCH_FAILED)
GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
maybe_abort_msg);
@@ -1401,11 +1403,6 @@ nvptx_exec
GOMP_PLUGIN_debug (0, " %s: kernel %s: finished\n", __FUNCTION__,
targ_fn->launch->fn);
-
-#ifndef DISABLE_ASYNC
- if (async < acc_async_noval)
-#endif
- map_pop (dev_str);
}
void * openacc_get_current_cuda_context (void);
...
but then we run into the same CUDA_ERROR_ILLEGAL_INSTRUCTION when calling
cuMemFree when trying to free the device pointer:
...
libgomp: cuStreamSynchronize error: an illegal instruction was encountered
libgomp: cuMemFree error: an illegal instruction was encountered
...
because the cuda error leaves the process in an inconsistent state and any
further CUDA calls in the process will return the same error:
We could do:
...
@@ -237,7 +237,7 @@ cuda_map_create (size_t size)
static void
cuda_map_destroy (struct cuda_map *map)
{
- CUDA_CALL_ASSERT (cuMemFree, map->d);
+ CUDA_CALL_NOCHECK (cuMemFree, map->d);
free (map);
}
...
but that's just a workaround.