> 
> The select(2) syscall performs a kmalloc(size, GFP_KERNEL) where size grows
> with the number of fds passed. We had a customer report page allocation
> failures of order-4 for this allocation. This is a costly order, so it might
> easily fail, as the VM expects such allocation to have a lower-order fallback.
> 
> Such trivial fallback is vmalloc(), as the memory doesn't have to be
> physically contiguous. Also the allocation is temporary for the duration of 
> the
> syscall, so it's unlikely to stress vmalloc too much.
> 
> Note that the poll(2) syscall seems to use a linked list of order-0 pages, so
> it doesn't need this kind of fallback.
> 
> Signed-off-by: Vlastimil Babka <vba...@suse.cz>
> ---
>  fs/select.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/select.c b/fs/select.c
> index 8ed9da50896a..8fe5bddbe99b 100644
> --- a/fs/select.c
> +++ b/fs/select.c
> @@ -29,6 +29,7 @@
>  #include <linux/sched/rt.h>
>  #include <linux/freezer.h>
>  #include <net/busy_poll.h>
> +#include <linux/vmalloc.h>
> 
>  #include <asm/uaccess.h>
> 
> @@ -558,6 +559,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set 
> __user *outp,
>       struct fdtable *fdt;
>       /* Allocate small arguments on the stack to save memory and be faster */
>       long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
> +     unsigned long alloc_size;
> 
>       ret = -EINVAL;
>       if (n < 0)
> @@ -580,10 +582,15 @@ int core_sys_select(int n, fd_set __user *inp, fd_set 
> __user *outp,
>       bits = stack_fds;
>       if (size > sizeof(stack_fds) / 6) {
>               /* Not enough space in on-stack array; must use kmalloc */
> +             alloc_size = 6 * size;
>               ret = -ENOMEM;
> -             bits = kmalloc(6 * size, GFP_KERNEL);
> -             if (!bits)
> -                     goto out_nofds;
> +             bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN);
> +             if (!bits && alloc_size > PAGE_SIZE) {
> +                     bits = vmalloc(alloc_size);
> +
> +                     if (!bits)
> +                             goto out_nofds;
> +             }

Looks like we also have to bail out if kmalloc fails with 
alloc_size less than PAGE_SIZE.

thanks
Hillf
>       }
>       fds.in      = bits;
>       fds.out     = bits +   size;
> @@ -618,7 +625,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set 
> __user *outp,
> 
>  out:
>       if (bits != stack_fds)
> -             kfree(bits);
> +             kvfree(bits);
>  out_nofds:
>       return ret;
>  }
> --
> 2.10.0

Reply via email to