https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88939

--- Comment #1 from Tom de Vries <vries at gcc dot gnu.org> ---
The usual fix for this sort of problem is to move the map_pop to before the
GOMP_PLUGIN_fatal:
...
@@ -1365,6 +1365,7 @@ nvptx_exec
   if (async < acc_async_noval)
     {
       r = CUDA_CALL_NOCHECK (cuStreamSynchronize, dev_str->stream);
+      map_pop (dev_str);
       if (r == CUDA_ERROR_LAUNCH_FAILED)
        GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s %s\n", cuda_error
(r),
                           maybe_abort_msg);
@@ -1392,6 +1393,7 @@ nvptx_exec
     }
 #else
   r = CUDA_CALL_NOCHECK (cuCtxSynchronize, );
+  map_pop (dev_str);
   if (r == CUDA_ERROR_LAUNCH_FAILED)
     GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
                       maybe_abort_msg);
@@ -1401,11 +1403,6 @@ nvptx_exec

   GOMP_PLUGIN_debug (0, "  %s: kernel %s: finished\n", __FUNCTION__,
                     targ_fn->launch->fn);
-
-#ifndef DISABLE_ASYNC
-  if (async < acc_async_noval)
-#endif
-    map_pop (dev_str);
 }

 void * openacc_get_current_cuda_context (void);
...
but then we run into the same CUDA_ERROR_ILLEGAL_INSTRUCTION when calling
cuMemFree when trying to free the device pointer:
...
libgomp: cuStreamSynchronize error: an illegal instruction was encountered

libgomp: cuMemFree error: an illegal instruction was encountered
...
because the cuda error leaves the process in an inconsistent state and any
further CUDA calls in the process will return the same error:

We could do:
...
@@ -237,7 +237,7 @@ cuda_map_create (size_t size)
 static void
 cuda_map_destroy (struct cuda_map *map)
 {
-  CUDA_CALL_ASSERT (cuMemFree, map->d);
+  CUDA_CALL_NOCHECK (cuMemFree, map->d);
   free (map);
 }

...
but that's just a workaround.

Reply via email to