Hi Tobias!
On 2024-09-19T19:11:32+0200, Tobias Burnus <[email protected]> wrote:
> OpenMP: Add get_device_from_uid/omp_get_uid_from_device routines
'[omp_]get_device_from_uid'.
> Those TR13/OpenMP 6.0 routines permit a reproducible offloading to
> a specific device by mapping an OpenMP device number to a
> unique ID (UID). The GPU device UIDs should be universally unique,
> the one for the host is not.
> --- a/gcc/omp-general.cc
> +++ b/gcc/omp-general.cc
> @@ -3260,6 +3260,7 @@ omp_runtime_api_procname (const char *name)
> "alloc",
> "calloc",
> "free",
> + "get_device_from_uid",
> "get_interop_int",
> "get_interop_ptr",
> "get_mapped_ptr",
> @@ -3338,12 +3339,13 @@ omp_runtime_api_procname (const char *name)
> as DECL_NAME only omp_* and omp_*_8 appear. */
> "display_env",
> "get_ancestor_thread_num",
> - "init_allocator",
> + "omp_get_uid_from_device",
> "get_partition_place_nums",
> "get_place_num_procs",
> "get_place_proc_ids",
> "get_schedule",
> "get_team_size",
> + "init_allocator",
> "set_default_device",
> "set_dynamic",
> "set_max_active_levels",
..., but here without 'omp_' prefix: 'get_uid_from_device' (and properly
sorted).
Do we apparently not have test suite coverage for these things?
> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> @@ -1387,6 +1387,7 @@ struct gomp_device_descr
>
> /* The name of the device. */
> const char *name;
> + const char *uid;
Caching this here, instead of acquiring via 'GOMP_OFFLOAD_get_uid' for
each call, is a minor performance optimization? (Similar to other items
cached here, I guess.)
> @@ -1399,6 +1400,7 @@ struct gomp_device_descr
>
> /* Function handlers. */
> __typeof (GOMP_OFFLOAD_get_name) *get_name_func;
> + __typeof (GOMP_OFFLOAD_get_uid) *get_uid_func;
> __typeof (GOMP_OFFLOAD_get_caps) *get_caps_func;
> __typeof (GOMP_OFFLOAD_get_type) *get_type_func;
> __typeof (GOMP_OFFLOAD_get_num_devices) *get_num_devices_func;
Please also update 'libgomp/oacc-host.c:host_dispatch'.
> --- a/libgomp/omp_lib.f90.in
> +++ b/libgomp/omp_lib.f90.in
> + interface
> + ! Note: In gfortran, strings are \0 termined
> + integer(c_int) function omp_get_device_from_uid(uid) bind(C)
> + use iso_c_binding
> + character(c_char), intent(in) :: uid(*)
> + end function omp_get_device_from_uid
> + end interface
For my understanding: in general, Fortran strings are *not*
NUL-terminated, right? So this is a specific properly of 'gfortran'
and/or this GCC/OpenMP interface, that it requires passing in a
NUL-terminated string? (..., so that you're permitted to simply
'bind(C)' to the C 'omp_get_device_from_uid'?)
> + interface omp_get_uid_from_device
> + ! Deviation from OpenMP 6.0: VALUE added.
(..., which I suppose you've reported to OpenMP...)
> + character(:) function omp_get_uid_from_device (device_num)
> + use iso_c_binding
> + pointer :: omp_get_uid_from_device
> + integer(c_int32_t), intent(in), value :: device_num
> + end function omp_get_uid_from_device
> +
> + character(:) function omp_get_uid_from_device_8 (device_num)
> + use iso_c_binding
> + pointer :: omp_get_uid_from_device_8
> + integer(c_int64_t), intent(in), value :: device_num
> + end function omp_get_uid_from_device_8
> + end interface omp_get_uid_from_device
> --- a/libgomp/omp_lib.h.in
> +++ b/libgomp/omp_lib.h.in
Likewise.
> --- a/libgomp/plugin/plugin-gcn.c
> +++ b/libgomp/plugin/plugin-gcn.c
> +const char *
> +GOMP_OFFLOAD_get_uid (int ord)
> +{
> + char *str;
> + hsa_status_t status;
> + struct agent_info *agent = get_agent_info (ord);
> +
> + /* HSA documentation states: maximally 21 characters including NUL. */
> + str = GOMP_PLUGIN_malloc (21 * sizeof (char));
> + status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AMD_AGENT_INFO_UUID,
> + str);
> + if (status != HSA_STATUS_SUCCESS)
> + hsa_fatal ("Could not obtain device UUID", status);
> + return str;
> +}
I guess I'd have just put this code into 'init_hsa_context', filling a
new statically-sized 'uuid' field in 'hsa_context_info' (like
'driver_version_s'; and assuming that 'hsa_context_info' is the right
abstraction for this), and then just return that 'uuid' from
'GOMP_OFFLOAD_get_uid'. That way, you'd avoid the unclear semantics of
who gets to 'free' the buffer returned from 'GOMP_OFFLOAD_get_uid' upon
'GOMP_OFFLOAD_fini_device' -- currently the memory is lost?
> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c
> +const char *
> +GOMP_OFFLOAD_get_uid (int ord)
> +{
Likewise. ('nvptx_init', new statically-sized 'uuid' field in
'ptx_device', or similar.)
> + CUresult r;
> + CUuuid s;
> + struct ptx_device *dev = ptx_devices[ord];
> +
> + if (CUDA_CALL_EXISTS (cuDeviceGetUuid_v2))
> + r = CUDA_CALL_NOCHECK (cuDeviceGetUuid_v2, &s, dev->dev);
> + else if (CUDA_CALL_EXISTS (cuDeviceGetUuid))
> + r = CUDA_CALL_NOCHECK (cuDeviceGetUuid, &s, dev->dev);
> + else
> + r = CUDA_ERROR_NOT_FOUND;
That might qualify as creative use of 'CUDA_ERROR_NOT_FOUND' (..., which
normally means that a CUDA Driver API routine has not found something,
not that a CUDA Driver API routine has not been found), but OK...
But actually, should we just 'return NULL;' in that case, and let libgomp
proper handle this gracefully, similar to as if 'GOMP_OFFLOAD_get_uid'
isn't implemented at all by a plugin? (What are the expected OpenMP
semantics?)
> + if (r != CUDA_SUCCESS)
> + GOMP_PLUGIN_fatal ("cuDeviceGetUuid error: %s", cuda_error (r));
> +
> + size_t len = strlen ("GPU-12345678-9abc-defg-hijk-lmniopqrstuv");
> + char *str = (char *) GOMP_PLUGIN_malloc (len + 1);
> + sprintf (str,
> + "GPU-%02x" "%02x" "%02x" "%02x"
> + "-%02x" "%02x"
> + "-%02x" "%02x"
> + "-%02x" "%02x" "%02x" "%02x" "%02x" "%02x" "%02x" "%02x",
> + (unsigned char) s.bytes[0], (unsigned char) s.bytes[1],
> + (unsigned char) s.bytes[2], (unsigned char) s.bytes[3],
> + (unsigned char) s.bytes[4], (unsigned char) s.bytes[5],
> + (unsigned char) s.bytes[6], (unsigned char) s.bytes[7],
> + (unsigned char) s.bytes[8], (unsigned char) s.bytes[9],
> + (unsigned char) s.bytes[10], (unsigned char) s.bytes[11],
> + (unsigned char) s.bytes[12], (unsigned char) s.bytes[13],
> + (unsigned char) s.bytes[14], (unsigned char) s.bytes[15]);
> + return str;
> +}
Grüße
Thomas