Hi Andrew! On 2022-03-08T11:30:57+0000, Hafiz Abid Qadeer <ab...@codesourcery.com> wrote: > From: Andrew Stubbs <a...@codesourcery.com> > > This adds support for using Cuda Managed Memory with omp_alloc. It will be > used as the underpinnings for "requires unified_shared_memory" in a later > patch. > > There are two new predefined allocators, ompx_unified_shared_mem_alloc and > ompx_host_mem_alloc, plus corresponding memory spaces, [...]
> --- a/libgomp/config/linux/allocator.c > +++ b/libgomp/config/linux/allocator.c > @@ -42,9 +42,11 @@ > static void * > linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin) > { > - (void)memspace; > - > - if (pin) > + if (memspace == ompx_unified_shared_mem_space) > + { > + return gomp_usm_alloc (size, GOMP_DEVICE_ICV); > + } > + else if (pin) > { > void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE, > MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); This I understand conceptually, but then: > @@ -67,7 +69,14 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, > size_t size, int pin) > static void * > linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin) > { > - if (pin) > + if (memspace == ompx_unified_shared_mem_space) > + { > + void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV); > + memset (ret, 0, size); > + return ret; > + } > + else if (memspace == ompx_unified_shared_mem_space > + || pin) > return linux_memspace_alloc (memspace, size, pin); > else > return calloc (1, size); ..., here, we've got a duplicated (and thus always-false) expression 'memspace == ompx_unified_shared_mem_space' (..., which '-Wduplicated-cond' fails to report; <https://gcc.gnu.org/PR108753> "'-Wduplicated-cond' doesn't diagnose duplicated subexpressions"...). Is the correct fix the following (conceptually like 'linux_memspace_alloc' cited above), or is there something that I fail to understand? static void * linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin) { if (memspace == ompx_unified_shared_mem_space) { void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV); memset (ret, 0, size); return ret; } - else if (memspace == ompx_unified_shared_mem_space - || pin) + else if (pin) return linux_memspace_alloc (memspace, size, pin); else return calloc (1, size); The following ones then again are conceptually like 'linux_memspace_alloc' cited above: > @@ -77,9 +86,9 @@ static void > linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size, > int pin) > { > - (void)memspace; > - > - if (pin) > + if (memspace == ompx_unified_shared_mem_space) > + gomp_usm_free (addr, GOMP_DEVICE_ICV); > + else if (pin) > munmap (addr, size); > else > free (addr); > @@ -89,7 +98,9 @@ static void * > linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr, > size_t oldsize, size_t size, int oldpin, int pin) > { > - if (oldpin && pin) > + if (memspace == ompx_unified_shared_mem_space) > + goto manual_realloc; > + else if (oldpin && pin) > { > void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE); > if (newaddr == MAP_FAILED) > @@ -98,18 +109,19 @@ linux_memspace_realloc (omp_memspace_handle_t memspace, > void *addr, > [...] ..., and similar those here: > --- a/libgomp/config/nvptx/allocator.c > +++ b/libgomp/config/nvptx/allocator.c > @@ -125,6 +125,8 @@ nvptx_memspace_alloc (omp_memspace_handle_t memspace, > size_t size) > __atomic_store_n (&__nvptx_lowlat_heap_root, root.raw, > MEMMODEL_RELEASE); > return result; > } > + else if (memspace == ompx_host_mem_space) > + return NULL; > else > return malloc (size); > } > @@ -145,6 +147,8 @@ nvptx_memspace_calloc (omp_memspace_handle_t memspace, > size_t size) > > return result; > } > + else if (memspace == ompx_host_mem_space) > + return NULL; > else > return calloc (1, size); > } > @@ -354,6 +358,8 @@ nvptx_memspace_realloc (omp_memspace_handle_t memspace, > void *addr, > } > return result; > } > + else if (memspace == ompx_host_mem_space) > + return NULL; > else > return realloc (addr, size); > } (I'd have added an explicit no-op (or, 'abort'?) to 'nvptx_memspace_free', but that's maybe just me...) ;-\ > --- a/libgomp/libgomp.h > +++ b/libgomp/libgomp.h > +extern void * gomp_usm_alloc (size_t size, int device_num); > +extern void gomp_usm_free (void *device_ptr, int device_num); > +extern bool gomp_is_usm_ptr (void *ptr); 'gomp_is_usm_ptr' isn't defined/used anywhere; I'll remove it. > --- a/libgomp/target.c > +++ b/libgomp/target.c > @@ -3740,6 +3807,9 @@ gomp_load_plugin_for_device (struct gomp_device_descr > *device, > DLSYM (unload_image); > DLSYM (alloc); > DLSYM (free); > + DLSYM_OPT (usm_alloc, usm_alloc); > + DLSYM_OPT (usm_free, usm_free); > + DLSYM_OPT (is_usm_ptr, is_usm_ptr); > DLSYM (dev2host); > DLSYM (host2dev); As a sanity check, shouldn't we check that either none or all three of those are defined, like in the 'if (cuda && cuda != 4) { [error] }' check a bit further down? Note that these remarks likewise apply to the current upstream submission: <https://inbox.sourceware.org/gcc-patches/ef374d055251b2bc65b97d7e54a0a72d811b869d.1657188329.git....@codesourcery.com> "openmp, nvptx: ompx_unified_shared_mem_alloc". Grüße Thomas ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955