Applied, thanks!
Mike Kelly, le mar. 10 mars 2026 21:28:12 +0000, a ecrit:
> It's possible to allocate all pages in a segment (and higher priority
> ones) to privileged threads. This can result in a panic even though
> memory is available in lower priority segments which haven't yet been
> rebalanced by the pageout daemon. Rebalancing is now done actively
> during privileged page allocation.
> ---
> vm/vm_page.c | 31 +++++++++++++++++++++++--------
> vm/vm_page.h | 3 +++
> vm/vm_resident.c | 4 ----
> 3 files changed, 26 insertions(+), 12 deletions(-)
>
> diff --git a/vm/vm_page.c b/vm/vm_page.c
> index 5cdf0c7b..a656aa01 100644
> --- a/vm/vm_page.c
> +++ b/vm/vm_page.c
> @@ -990,7 +990,8 @@ vm_page_seg_double_unlock(struct vm_page_seg *seg1,
> struct vm_page_seg *seg2)
> */
> static boolean_t
> vm_page_seg_balance_page(struct vm_page_seg *seg,
> - struct vm_page_seg *remote_seg)
> + struct vm_page_seg *remote_seg,
> + boolean_t priv_alloc)
> {
> struct vm_page *src, *dest;
> vm_object_t object;
> @@ -1002,7 +1003,9 @@ vm_page_seg_balance_page(struct vm_page_seg *seg,
> vm_page_seg_double_lock(seg, remote_seg);
>
> if (vm_page_seg_usable(seg)
> - || !vm_page_seg_page_available(remote_seg)) {
> + || (priv_alloc
> + ? remote_seg->nr_free_pages == 0
> + : !vm_page_seg_page_available(remote_seg))) {
> goto error;
> }
>
> @@ -1082,7 +1085,7 @@ error:
> }
>
> static boolean_t
> -vm_page_seg_balance(struct vm_page_seg *seg)
> +vm_page_seg_balance(struct vm_page_seg *seg, boolean_t priv_alloc)
> {
> struct vm_page_seg *remote_seg;
> unsigned int i;
> @@ -1100,7 +1103,7 @@ vm_page_seg_balance(struct vm_page_seg *seg)
> continue;
> }
>
> - balanced = vm_page_seg_balance_page(seg, remote_seg);
> + balanced = vm_page_seg_balance_page(seg, remote_seg, priv_alloc);
>
> if (balanced) {
> return TRUE;
> @@ -1611,16 +1614,28 @@ vm_page_alloc_pa(unsigned int order, unsigned int
> selector, unsigned short type)
> struct vm_page *page;
> unsigned int i;
>
> - for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--)
> {
> + const unsigned int seg_index = vm_page_select_alloc_seg(selector);
> +
> +retry:
> + simple_lock(&vm_page_queue_free_lock);
> +
> + for (i = seg_index; i < vm_page_segs_size; i--) {
> page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
>
> if (page != NULL)
> return page;
> }
>
> - /* FIXME: rebalance segments? */
> if (!current_thread() || current_thread()->vm_privilege)
> - panic("vm_page: privileged thread unable to allocate page");
> + {
> + simple_unlock(&vm_page_queue_free_lock);
> +
> + for (i = seg_index; i < vm_page_segs_size; i--)
> + if (vm_page_seg_balance(vm_page_seg_get(i), TRUE))
> + goto retry;
> +
> + panic("vm_page: privileged thread unable to allocate page");
> + }
>
> return NULL;
> }
> @@ -1989,7 +2004,7 @@ vm_page_balance_once(void)
> */
>
> for (i = 0; i < vm_page_segs_size; i++) {
> - balanced = vm_page_seg_balance(vm_page_seg_get(i));
> + balanced = vm_page_seg_balance(vm_page_seg_get(i), FALSE);
>
> if (balanced) {
> return TRUE;
> diff --git a/vm/vm_page.h b/vm/vm_page.h
> index 9e110209..49b5e602 100644
> --- a/vm/vm_page.h
> +++ b/vm/vm_page.h
> @@ -461,6 +461,9 @@ struct vm_page * vm_page_lookup_pa(phys_addr_t pa);
> * The selector is used to determine the segments from which allocation can
> * be attempted.
> *
> + * vm_page_queue_free_lock should be in an unlocked state pre-call but
> + * will always be locked on return.
> + *
> * This function should only be used by the vm_resident module.
> */
> struct vm_page * vm_page_alloc_pa(unsigned int order, unsigned int selector,
> diff --git a/vm/vm_resident.c b/vm/vm_resident.c
> index a6a90026..aaf5fc8b 100644
> --- a/vm/vm_resident.c
> +++ b/vm/vm_resident.c
> @@ -808,8 +808,6 @@ vm_page_t vm_page_grab(unsigned flags)
> else
> selector = VM_PAGE_SEL_DMA;
>
> - simple_lock(&vm_page_queue_free_lock);
> -
> /*
> * XXX Mach has many modules that merely assume memory is
> * directly mapped in kernel space. Instead of updating all
> @@ -901,8 +899,6 @@ vm_page_t vm_page_grab_contig(
> order = vm_page_order(size);
> nr_pages = 1 << order;
>
> - simple_lock(&vm_page_queue_free_lock);
> -
> /* TODO Allow caller to pass type */
> mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
>
> --
> 2.47.3
>
>