Hello
On Mon, Jan 29, 2024 at 3:59 PM Samuel Thibault <[email protected]>
wrote:
> For rumpdisk to efficiently determine the physical address, both for
> checking whether it is below 4GiB, and for giving it to the disk
> driver, we need a gnumach primitive (and that is not conditioned by
> MACH_VM_DEBUG like mach_vm_region_info and mach_vm_object_pages_phys
> are).
>
> ---
> Please notably review the RPC part, I really don't know that much about
> mig.
>
> diff --git a/i386/include/mach/i386/machine_types.defs
> b/i386/include/mach/i386/machine_types.defs
> index 3d540be9..76c7dcf9 100755
> --- a/i386/include/mach/i386/machine_types.defs
> +++ b/i386/include/mach/i386/machine_types.defs
> @@ -102,5 +102,6 @@ type long_integer_t = rpc_long_integer_t
> * Physical address size
> */
> type rpc_phys_addr_t = uint64_t;
> +type rpc_phys_addr_array_t = array[] of rpc_phys_addr_t;
>
Looks good to me. I think array[] of rpc_phys_addr_t is the best choice
here.
>From my understanding, there's three ways to do arrays in mig:
- array[] of rpc_phys_addr_t;
This will pass up to 2048 bytes inlined in the message and anything bigger
than that will be out of line. The user can pass a pre-existing array and
mig will try to copy it if there's enough space. When the array is too big,
the user gets a new pointer. In the function below, *countp will always be
256 initially (size of the inline array) and it is possible to allocate a
new page if (*countp < count) so that the user will still get the results
if there are more than 256 items.
- ^array[] of rpc_phys_addr;
Here everything is passed out of line. The user does need to pass an
allocated array as the data will be passed out of line.
- array[*:128] of rpc_phys_addr;
This is only usable in case the maximum size is known ahead of time as
everything is passed inline.
> #endif /* _MACHINE_MACHINE_TYPES_DEFS_ */
> diff --git a/i386/include/mach/i386/vm_types.h
> b/i386/include/mach/i386/vm_types.h
> index bd07ef26..8f528ae1 100644
> --- a/i386/include/mach/i386/vm_types.h
> +++ b/i386/include/mach/i386/vm_types.h
> @@ -94,6 +94,7 @@ typedef unsigned long phys_addr_t;
> typedef unsigned long long phys_addr_t;
> #endif
> typedef unsigned long long rpc_phys_addr_t;
> +typedef rpc_phys_addr_t *rpc_phys_addr_array_t;
>
> /*
> * A vm_size_t is the proper type for e.g.
> diff --git a/include/mach/gnumach.defs b/include/mach/gnumach.defs
> index 05101a48..6252de96 100644
> --- a/include/mach/gnumach.defs
> +++ b/include/mach/gnumach.defs
> @@ -197,3 +197,13 @@ routine vm_allocate_contiguous(
> simpleroutine task_set_essential(
> task : task_t;
> essential : boolean_t);
> +
> +/*
> + * Returns physical addresses of a region of memory
> + */
> +routine vm_pages_phys(
> + host_priv : host_priv_t;
> + target_task : vm_task_t;
> + vaddr : vm_address_t;
> + size : vm_size_t;
> + out pages : rpc_phys_addr_array_t);
> diff --git a/vm/vm_user.c b/vm/vm_user.c
> index 08cc17a4..6c16c397 100644
> --- a/vm/vm_user.c
> +++ b/vm/vm_user.c
> @@ -700,3 +700,90 @@ kern_return_t vm_allocate_contiguous(
>
> return KERN_SUCCESS;
> }
> +
> +/*
> + * vm_pages_phys returns information about a region of memory
> + */
> +kern_return_t vm_pages_phys(
> + host_t host,
> + vm_map_t map,
> + vm_address_t address,
> + vm_size_t size,
> + rpc_phys_addr_array_t *pagespp,
> + mach_msg_type_number_t *countp)
> +{
> + if (host == HOST_NULL)
> + return KERN_INVALID_HOST;
> + if (map == VM_MAP_NULL)
> + return KERN_INVALID_TASK;
> +
> + if (!page_aligned(address))
> + return KERN_INVALID_ARGUMENT;
> + if (!page_aligned(size))
> + return KERN_INVALID_ARGUMENT;
> +
> + mach_msg_type_number_t count = atop(size), cur;
> +
> + if (*countp < count)
> + return KERN_INVALID_ARGUMENT;
> +
> + rpc_phys_addr_array_t pagesp = *pagespp;
> +
> + for (cur = 0; cur < count; cur++)
> + {
> + vm_map_t cmap; /* current map in traversal */
> + rpc_phys_addr_t paddr;
> + vm_map_entry_t entry; /* entry in current map */
> +
> + /* find the entry containing (or following) the address */
> + vm_map_lock_read(map);
> + for (cmap = map;;) {
> + /* cmap is read-locked */
> +
> + if (!vm_map_lookup_entry(cmap, address, &entry)) {
> + entry = VM_MAP_ENTRY_NULL;
> + break;
> + }
> +
> + if (entry->is_sub_map) {
> + /* move down to the sub map */
> +
> + vm_map_t nmap = entry->object.sub_map;
> + vm_map_lock_read(nmap);
> + vm_map_unlock_read(cmap);
> + cmap = nmap;
> + continue;
> + } else {
> + /* Found it */
> + break;
> + }
> + /*NOTREACHED*/
> + }
> +
> + paddr = 0;
> + if (entry)
> + {
> + vm_offset_t offset = address - entry->vme_start +
> entry->offset;
> + vm_object_t object = entry->object.vm_object;
> +
> + if (object) {
> + vm_object_lock(object);
> + vm_page_t page = vm_page_lookup(object,
> offset);
> + if (page) {
> + if (page->phys_addr !=
> (typeof(pagesp[cur])) page->phys_addr)
> + printf("warning: physical
> address overflow in vm_pages_phys!!\n");
> + else
> + paddr = page->phys_addr;
> + }
> + vm_object_unlock(object);
> + }
> + }
> + vm_map_unlock_read(cmap);
> + pagesp[cur] = paddr;
> +
> + address += PAGE_SIZE;
> + }
> + *countp = count;
> +
> + return KERN_SUCCESS;
> +}
>
>