Hi!

I'd like to improve Henning's newly placed HFSC code by making it grow
the array with the class pointers automatically, so setups (like mine)
with thousands of classes will work out of the box.

As Henning pointed out on icb, the current algorithm "take a guess or do
a linear search" is good enough, and it'll probably still be good enough
if (when) we turn parallel (see
  http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table/
for example), so I only added the autosizing stuff.  The beginning of
hfsc_if is now nice and compact (one cache line) and the sizes are picked
so that'll be page aligned and take up the all the space in those pages.
(Who wants more TLB exceptions/page faults in their forwarding path?)

This means on amd64 about 500 classes by default, and on i386 twice that.

Does anyone know if (how) malloc(9) returns page aligned addresses on
sizes being multiples of pages?  Should I use different mechanism?

I'm aware that changing "i % HFSC_MAX_CLASSES" to "i % hif->hif_allocated"
will actually do a load + a real modulo operation (which on my Atom is
only about 1.8x slower than the AND), but the variable isn't used anywhere
else in the code, so compilers can reorder it as they wish and it may even
happen in parallel inside the ALU.

Tested on amd64.  With tens of thousands of queues our routers actually
move to Siberia serving interrupt time.  Don't worry, though; I've got a
possible speedup as well! :-)

Ok's?  Comments?  Can haz committed?
--
Martin Pelikan


Index: net/hfsc.c
===================================================================
RCS file: /cvs/src/sys/net/hfsc.c,v
retrieving revision 1.1
diff -u -p -r1.1 hfsc.c
--- net/hfsc.c  12 Oct 2013 11:39:17 -0000      1.1
+++ net/hfsc.c  17 Oct 2013 22:30:01 -0000
@@ -128,15 +128,71 @@ hfsc_microuptime(void)
            HFSC_CLK_SHIFT);
 }
 
+/*
+ * We had @old slots allocated, and we'd like to grow the table.
+ * The new structure will be exactly one page larger, so in the most
+ * common case of 8B pointers and 4KB pages roughly 500 more classes.
+ * Returns the amount of classes, so all new pages are 100% utilized.
+ */
+static inline u_int
+hfsc_new_amount(u_int old)
+{
+       /* In case sizeof hfsc_if isn't divisible, round it up. */
+       const u_int header = (sizeof(struct hfsc_if) + sizeof(void *) - 1)
+           / sizeof(void *);
+       u_int was_pages = (header + old) * sizeof(void *) / PAGE_SIZE;
+       u_int n = ((was_pages + 1) * PAGE_SIZE) / sizeof(void *);
+
+       return (n - header);
+}
+
+static struct hfsc_if *
+hfsc_grow(struct hfsc_if *old)
+{
+       u_int i;
+       struct hfsc_if *n;
+       const u_int slots = hfsc_new_amount(old->hif_allocated);
+       const size_t sz = slots * sizeof(void *) + sizeof *n;
+
+       assert((sz & (PAGE_SIZE - 1)) == 0);
+
+       n = malloc(sz, M_DEVBUF, M_WAITOK | M_ZERO);
+       n->hif_next = old->hif_next;
+       n->hif_rootclass = old->hif_rootclass;
+       n->hif_defaultclass = old->hif_defaultclass;
+       n->hif_pollcache = old->hif_pollcache;
+       n->hif_eligible = old->hif_eligible;
+
+       memcpy(n->hif_class_tbl, old->hif_class_tbl,
+           old->hif_allocated * sizeof(void *));
+       for (i = 0; i < old->hif_allocated; ++i)
+               if (n->hif_class_tbl[i])
+                       n->hif_class_tbl[i]->cl_hif = n;
+
+       n->hif_allocated = slots;
+       n->hif_classes = old->hif_classes;
+       n->hif_packets = old->hif_packets;
+       n->hif_classid = old->hif_classid;
+
+       old->hif_ifq->ifq_hfsc = n;
+       n->hif_ifq = old->hif_ifq;
+       free(old, M_DEVBUF);
+       return (n);
+}
+
 int
 hfsc_attach(struct ifnet *ifp)
 {
        struct hfsc_if *hif;
+       const size_t sz = sizeof *hif + hfsc_new_amount(0) * sizeof(void *);
+
+       assert((sz & (PAGE_SIZE - 1)) == 0);
 
        if (ifp->if_snd.ifq_hfsc != NULL)
                return (0);
 
-       hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK|M_ZERO);
+       hif = malloc(sz, M_DEVBUF, M_WAITOK | M_ZERO);
+       hif->hif_allocated = hfsc_new_amount(0);
        hif->hif_eligible = hfsc_ellist_alloc();
        hif->hif_ifq = (struct ifqueue *)&ifp->if_snd; /* XXX cast temp */
        ifp->if_snd.ifq_hfsc = hif;
@@ -253,8 +309,8 @@ hfsc_class_create(struct hfsc_if *hif, s
        struct hfsc_class *cl, *p;
        int i, s;
 
-       if (hif->hif_classes >= HFSC_MAX_CLASSES)
-               return (NULL);
+       if (hif->hif_classes >= hif->hif_allocated)
+               hif = hfsc_grow(hif);
 
        cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_WAITOK|M_ZERO);
        cl->cl_q = malloc(sizeof(struct hfsc_classq), M_DEVBUF,
@@ -330,16 +386,16 @@ hfsc_class_create(struct hfsc_if *hif, s
         * the lower bits of qid is free, use this slot.  otherwise,
         * use the first free slot.
         */
-       i = qid % HFSC_MAX_CLASSES;
+       i = qid % hif->hif_allocated;
        if (hif->hif_class_tbl[i] == NULL)
                hif->hif_class_tbl[i] = cl;
        else {
-               for (i = 0; i < HFSC_MAX_CLASSES; i++)
+               for (i = 0; i < hif->hif_allocated; i++)
                        if (hif->hif_class_tbl[i] == NULL) {
                                hif->hif_class_tbl[i] = cl;
                                break;
                        }
-               if (i == HFSC_MAX_CLASSES) {
+               if (i == hif->hif_allocated) {
                        splx(s);
                        goto err_ret;
                }
@@ -389,6 +445,7 @@ int
 hfsc_class_destroy(struct hfsc_class *cl)
 {
        int i, s;
+       u_int lim = cl->cl_hif->hif_allocated;
 
        if (cl == NULL)
                return (0);
@@ -414,7 +471,7 @@ hfsc_class_destroy(struct hfsc_class *cl
                } while ((p = p->cl_siblings) != NULL);
        }
 
-       for (i = 0; i < HFSC_MAX_CLASSES; i++)
+       for (i = 0; i < lim; i++)
                if (cl->cl_hif->hif_class_tbl[i] == cl) {
                        cl->cl_hif->hif_class_tbl[i] = NULL;
                        break;
@@ -1454,10 +1511,10 @@ hfsc_clh2cph(struct hfsc_if *hif, u_int3
         * first, try the slot corresponding to the lower bits of the handle.
         * if it does not match, do the linear table search.
         */
-       i = chandle % HFSC_MAX_CLASSES;
+       i = chandle % hif->hif_allocated;
        if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
                return (cl);
-       for (i = 0; i < HFSC_MAX_CLASSES; i++)
+       for (i = 0; i < hif->hif_allocated; i++)
                if ((cl = hif->hif_class_tbl[i]) != NULL &&
                    cl->cl_handle == chandle)
                        return (cl);
Index: net/hfsc.h
===================================================================
RCS file: /cvs/src/sys/net/hfsc.h,v
retrieving revision 1.1
diff -u -p -r1.1 hfsc.h
--- net/hfsc.h  12 Oct 2013 11:39:17 -0000      1.1
+++ net/hfsc.h  17 Oct 2013 22:30:01 -0000
@@ -55,7 +55,6 @@ struct hfsc_sc {
 
 /* special class handles */
 #define        HFSC_NULLCLASS_HANDLE   0
-#define        HFSC_MAX_CLASSES        64
 
 /* service curve types */
 #define        HFSC_REALTIMESC         1
@@ -234,14 +233,15 @@ struct hfsc_if {
        struct ifqueue          *hif_ifq;       /* backpointer to ifq */
        struct hfsc_class       *hif_rootclass;         /* root class */
        struct hfsc_class       *hif_defaultclass;      /* default class */
-       struct hfsc_class       *hif_class_tbl[HFSC_MAX_CLASSES];
        struct hfsc_class       *hif_pollcache; /* cache for poll operation */
 
+       u_int   hif_allocated;                  /* how many slots below */
        u_int   hif_classes;                    /* # of classes in the tree */
        u_int   hif_packets;                    /* # of packets in the tree */
        u_int   hif_classid;                    /* class id sequence number */
 
-       hfsc_ellist_t *hif_eligible;                    /* eligible list */
+       hfsc_ellist_t *hif_eligible;            /* eligible list */
+       struct hfsc_class       *hif_class_tbl[0];  /* slots for classes */
 };
 
 #define HFSC_CLK_SHIFT         8

Reply via email to