let pool allocators advertise multiple page sizes

David Gwynne Mon, 07 Nov 2016 17:04:50 -0800

this turns the pa_pagesz member of a pool allocator into a bitfield.

pool pages can be many different sizes, they arent restricted to
just the size provided by the hardware. to support this without
introducing a page allocator per page size, a single page allocator
can be used to allocate many different sizes. however, right now
there is no way to pass a custom page allocator to pool_init and
tell it that an allocator can do different page sizes. currently
pool_init only uses the multi page allocators when it's allowed to
choose one itself.


so, as i said above, pool allocators can now indicate what sizes
they can provide.  the low bit in pa_pagesz indicates whether the
allocator can align its allocation to the requested size. this is
necessary if you want to know if you can store the pool page headers
inside the allocation.

the rest of the bits map to supported page sizes. pools only support
page sizes that are powers of two. with that in mind, each power
of two is represented as a single bit which we can or together to
indicate the ranges of pages an allocator can provide.

eg, the multi page pools on sparc64 would have have 0xffffe001 as
pa_pagesz. the low bit says they can align their pages, and you can
test which sizes they support by oring sizes. eg, ISSET(0xffffe001,
8192) is true, as is ISSET(0xffffe001, 65536).

in the future i want to use this to provide an allocator for all
the mbufs and clusters so we can configure how much memory we want
packets to consume rather than how many packets we want to provide.
on my box here kern.maxclusters is 16384, which means we can have
16384 clusters allocated from any of the backend pools. 16384 64k
clusters is a gigabyte of ram, which is probably not what we want.
instead we should say we want all packets to be allocated from a
few meg of ram and let the any of the clusters come out of that
pool. this diff is a step toward that.

another benefit of that would be then enable the per cpu caches for
mbufs and clusters, which will be necessary to scale performance
when the stack is unlocked further.

ok?

Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.67
diff -u -p -r1.67 pool.h
--- sys/pool.h  7 Nov 2016 23:45:27 -0000       1.67
+++ sys/pool.h  8 Nov 2016 00:37:42 -0000
@@ -77,10 +77,36 @@ struct pool_request;
 TAILQ_HEAD(pool_requests, pool_request);
 
 struct pool_allocator {
-       void *(*pa_alloc)(struct pool *, int, int *);
-       void (*pa_free)(struct pool *, void *);
-       int pa_pagesz;
+       void            *(*pa_alloc)(struct pool *, int, int *);
+       void             (*pa_free)(struct pool *, void *);
+       size_t             pa_pagesz;
 };
+
+/*
+ * The pa_pagesz member encodes the sizes of pages that can be
+ * provided by the allocator, and whether the allocations can be
+ * aligned to their size.
+ *
+ * Page sizes can only be powers of two. Each available page size is
+ * represented by its value set as a bit. e.g., to indicate that an
+ * allocator can provide 16k and 32k pages you initialise pa_pagesz
+ * to (32768 | 16384).
+ *
+ * If the allocator can provide aligned pages the low bit in pa_pagesz
+ * is set. The POOL_ALLOC_ALIGNED macro is provided as a convenience.
+ *
+ * If pa_pagesz is unset (i.e. 0), POOL_ALLOC_DEFAULT will be used
+ * instead.
+ */
+
+#define POOL_ALLOC_ALIGNED             1UL
+#define POOL_ALLOC_SIZE(_sz, _a)       ((_sz) | (_a))
+#define POOL_ALLOC_SIZES(_min, _max, _a) \
+       ((_max) | \
+       (((_max) - 1) & ~((_min) - 1)) | (_a))
+
+#define POOL_ALLOC_DEFAULT \
+       POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
 
 TAILQ_HEAD(pool_pagelist, pool_page_header);
 
Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.203
diff -u -p -r1.203 subr_pool.c
--- kern/subr_pool.c    7 Nov 2016 23:45:27 -0000       1.203
+++ kern/subr_pool.c    8 Nov 2016 00:37:42 -0000
@@ -170,7 +170,8 @@ void        pool_page_free(struct pool *, void 
  */
 struct pool_allocator pool_allocator_single = {
        pool_page_alloc,
-       pool_page_free
+       pool_page_free,
+       POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
 };
 
 void   *pool_multi_alloc(struct pool *, int, int *);
@@ -178,7 +179,8 @@ void        pool_multi_free(struct pool *, void
 
 struct pool_allocator pool_allocator_multi = {
        pool_multi_alloc,
-       pool_multi_free
+       pool_multi_free,
+       POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
 };
 
 void   *pool_multi_alloc_ni(struct pool *, int, int *);
@@ -186,7 +188,8 @@ void        pool_multi_free_ni(struct pool *, v
 
 struct pool_allocator pool_allocator_multi_ni = {
        pool_multi_alloc_ni,
-       pool_multi_free_ni
+       pool_multi_free_ni,
+       POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
 };
 
 #ifdef DDB
@@ -264,6 +267,7 @@ pool_init(struct pool *pp, size_t size, 
 {
        int off = 0, space;
        unsigned int pgsize = PAGE_SIZE, items;
+       size_t pa_pagesz;
 #ifdef DIAGNOSTIC
        struct pool *iter;
 #endif
@@ -276,17 +280,38 @@ pool_init(struct pool *pp, size_t size, 
 
        size = roundup(size, align);
 
-       if (palloc == NULL) {
-               while (size * 8 > pgsize)
-                       pgsize <<= 1;
+       while (size * 8 > pgsize)
+               pgsize <<= 1;
 
+       if (palloc == NULL) {
                if (pgsize > PAGE_SIZE) {
                        palloc = ISSET(flags, PR_WAITOK) ?
                            &pool_allocator_multi_ni : &pool_allocator_multi;
                } else
                        palloc = &pool_allocator_single;
-       } else
-               pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
+
+               pa_pagesz = palloc->pa_pagesz;
+       } else {
+               size_t pgsizes;
+
+               pa_pagesz = palloc->pa_pagesz;
+               if (pa_pagesz == 0)
+                       pa_pagesz = POOL_ALLOC_DEFAULT;
+
+               pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
+
+               /* make sure the allocator can fit at least one item */
+               if (size > pgsizes) {
+                       panic("%s: pool %s item size 0x%zx > "
+                           "allocator %p sizes 0x%zx", __func__, wchan,
+                           size, palloc, pgsizes);
+               }
+
+               /* shrink pgsize until it fits into the range */
+               while (!ISSET(pgsizes, pgsize))
+                       pgsize >>= 1;
+       }
+       KASSERT(ISSET(pa_pagesz, pgsize));
 
        items = pgsize / size;
 
@@ -296,11 +321,14 @@ pool_init(struct pool *pp, size_t size, 
         * go into an RB tree, so we can match a returned item with
         * its header based on the page address.
         */
-       if (pgsize - (size * items) > sizeof(struct pool_page_header)) {
-               off = pgsize - sizeof(struct pool_page_header);
-       } else if (sizeof(struct pool_page_header) * 2 >= size) {
-               off = pgsize - sizeof(struct pool_page_header);
-               items = off / size;
+       if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
+               if (pgsize - (size * items) >
+                   sizeof(struct pool_page_header)) {
+                       off = pgsize - sizeof(struct pool_page_header);
+               } else if (sizeof(struct pool_page_header) * 2 >= size) {
+                       off = pgsize - sizeof(struct pool_page_header);
+                       items = off / size;
+               }
        }
 
        KASSERT(items > 0);

let pool allocators advertise multiple page sizes

Reply via email to