this turns the pa_pagesz member of a pool allocator into a bitfield. pool pages can be many different sizes, they arent restricted to just the size provided by the hardware. to support this without introducing a page allocator per page size, a single page allocator can be used to allocate many different sizes. however, right now there is no way to pass a custom page allocator to pool_init and tell it that an allocator can do different page sizes. currently pool_init only uses the multi page allocators when it's allowed to choose one itself.
so, as i said above, pool allocators can now indicate what sizes they can provide. the low bit in pa_pagesz indicates whether the allocator can align its allocation to the requested size. this is necessary if you want to know if you can store the pool page headers inside the allocation. the rest of the bits map to supported page sizes. pools only support page sizes that are powers of two. with that in mind, each power of two is represented as a single bit which we can or together to indicate the ranges of pages an allocator can provide. eg, the multi page pools on sparc64 would have have 0xffffe001 as pa_pagesz. the low bit says they can align their pages, and you can test which sizes they support by oring sizes. eg, ISSET(0xffffe001, 8192) is true, as is ISSET(0xffffe001, 65536). in the future i want to use this to provide an allocator for all the mbufs and clusters so we can configure how much memory we want packets to consume rather than how many packets we want to provide. on my box here kern.maxclusters is 16384, which means we can have 16384 clusters allocated from any of the backend pools. 16384 64k clusters is a gigabyte of ram, which is probably not what we want. instead we should say we want all packets to be allocated from a few meg of ram and let the any of the clusters come out of that pool. this diff is a step toward that. another benefit of that would be then enable the per cpu caches for mbufs and clusters, which will be necessary to scale performance when the stack is unlocked further. ok? Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.67 diff -u -p -r1.67 pool.h --- sys/pool.h 7 Nov 2016 23:45:27 -0000 1.67 +++ sys/pool.h 8 Nov 2016 00:37:42 -0000 @@ -77,10 +77,36 @@ struct pool_request; TAILQ_HEAD(pool_requests, pool_request); struct pool_allocator { - void *(*pa_alloc)(struct pool *, int, int *); - void (*pa_free)(struct pool *, void *); - int pa_pagesz; + void *(*pa_alloc)(struct pool *, int, int *); + void (*pa_free)(struct pool *, void *); + size_t pa_pagesz; }; + +/* + * The pa_pagesz member encodes the sizes of pages that can be + * provided by the allocator, and whether the allocations can be + * aligned to their size. + * + * Page sizes can only be powers of two. Each available page size is + * represented by its value set as a bit. e.g., to indicate that an + * allocator can provide 16k and 32k pages you initialise pa_pagesz + * to (32768 | 16384). + * + * If the allocator can provide aligned pages the low bit in pa_pagesz + * is set. The POOL_ALLOC_ALIGNED macro is provided as a convenience. + * + * If pa_pagesz is unset (i.e. 0), POOL_ALLOC_DEFAULT will be used + * instead. + */ + +#define POOL_ALLOC_ALIGNED 1UL +#define POOL_ALLOC_SIZE(_sz, _a) ((_sz) | (_a)) +#define POOL_ALLOC_SIZES(_min, _max, _a) \ + ((_max) | \ + (((_max) - 1) & ~((_min) - 1)) | (_a)) + +#define POOL_ALLOC_DEFAULT \ + POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) TAILQ_HEAD(pool_pagelist, pool_page_header); Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.203 diff -u -p -r1.203 subr_pool.c --- kern/subr_pool.c 7 Nov 2016 23:45:27 -0000 1.203 +++ kern/subr_pool.c 8 Nov 2016 00:37:42 -0000 @@ -170,7 +170,8 @@ void pool_page_free(struct pool *, void */ struct pool_allocator pool_allocator_single = { pool_page_alloc, - pool_page_free + pool_page_free, + POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) }; void *pool_multi_alloc(struct pool *, int, int *); @@ -178,7 +179,8 @@ void pool_multi_free(struct pool *, void struct pool_allocator pool_allocator_multi = { pool_multi_alloc, - pool_multi_free + pool_multi_free, + POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) }; void *pool_multi_alloc_ni(struct pool *, int, int *); @@ -186,7 +188,8 @@ void pool_multi_free_ni(struct pool *, v struct pool_allocator pool_allocator_multi_ni = { pool_multi_alloc_ni, - pool_multi_free_ni + pool_multi_free_ni, + POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) }; #ifdef DDB @@ -264,6 +267,7 @@ pool_init(struct pool *pp, size_t size, { int off = 0, space; unsigned int pgsize = PAGE_SIZE, items; + size_t pa_pagesz; #ifdef DIAGNOSTIC struct pool *iter; #endif @@ -276,17 +280,38 @@ pool_init(struct pool *pp, size_t size, size = roundup(size, align); - if (palloc == NULL) { - while (size * 8 > pgsize) - pgsize <<= 1; + while (size * 8 > pgsize) + pgsize <<= 1; + if (palloc == NULL) { if (pgsize > PAGE_SIZE) { palloc = ISSET(flags, PR_WAITOK) ? &pool_allocator_multi_ni : &pool_allocator_multi; } else palloc = &pool_allocator_single; - } else - pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; + + pa_pagesz = palloc->pa_pagesz; + } else { + size_t pgsizes; + + pa_pagesz = palloc->pa_pagesz; + if (pa_pagesz == 0) + pa_pagesz = POOL_ALLOC_DEFAULT; + + pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; + + /* make sure the allocator can fit at least one item */ + if (size > pgsizes) { + panic("%s: pool %s item size 0x%zx > " + "allocator %p sizes 0x%zx", __func__, wchan, + size, palloc, pgsizes); + } + + /* shrink pgsize until it fits into the range */ + while (!ISSET(pgsizes, pgsize)) + pgsize >>= 1; + } + KASSERT(ISSET(pa_pagesz, pgsize)); items = pgsize / size; @@ -296,11 +321,14 @@ pool_init(struct pool *pp, size_t size, * go into an RB tree, so we can match a returned item with * its header based on the page address. */ - if (pgsize - (size * items) > sizeof(struct pool_page_header)) { - off = pgsize - sizeof(struct pool_page_header); - } else if (sizeof(struct pool_page_header) * 2 >= size) { - off = pgsize - sizeof(struct pool_page_header); - items = off / size; + if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { + if (pgsize - (size * items) > + sizeof(struct pool_page_header)) { + off = pgsize - sizeof(struct pool_page_header); + } else if (sizeof(struct pool_page_header) * 2 >= size) { + off = pgsize - sizeof(struct pool_page_header); + items = off / size; + } } KASSERT(items > 0);