When forwarding a lot of traffic with 10G interfaces contention on the
NET_LOCK() is "visible".  Each time you type "ifconfig" you can go grab
a coffee...

The problem has a name: pf_purge_thread().  This thread is created by
default and run even if you don't have PF enabled.  Every `hz' it wakes
up, grab the lock and go to sleep. 

Since the execution of this thread is serialized with the `softnet' task,
it makes more sense to execute it in the same context.  This reduce the
NET_LOCK() contention and implicitly preempt the packet processing.

Diff below improves the situation with PF disabled, I didn't test with
PF enabled.

Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1037
diff -u -p -r1.1037 pf.c
--- net/pf.c    4 Jul 2017 14:10:15 -0000       1.1037
+++ net/pf.c    18 Jul 2017 13:28:10 -0000
@@ -121,6 +121,10 @@ u_char                      pf_tcp_secret[16];
 int                     pf_tcp_secret_init;
 int                     pf_tcp_iss_off;
 
+int             pf_npurge;
+struct task     pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge);
+struct timeout  pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL);
+
 enum pf_test_status {
        PF_TEST_FAIL = -1,
        PF_TEST_OK,
@@ -1200,36 +1204,44 @@ pf_purge_expired_rules(void)
 }
 
 void
-pf_purge_thread(void *v)
+pf_purge_timeout(void *unused)
 {
-       int nloops = 0, s;
-
-       for (;;) {
-               tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
-
-               NET_LOCK(s);
+       task_add(softnettq, &pf_purge_task);
+}
 
-               PF_LOCK();
-               /* process a fraction of the state table every second */
-               pf_purge_expired_states(1 + (pf_status.states
-                   / pf_default_rule.timeout[PFTM_INTERVAL]));
+void
+pf_purge(void *xnloops)
+{
+       int *nloops = xnloops;
+       int s;
 
-               /* purge other expired types every PFTM_INTERVAL seconds */
-               if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
-                       pf_purge_expired_src_nodes(0);
-                       pf_purge_expired_rules();
-               }
-               PF_UNLOCK();
-               /*
-                * Fragments don't require PF_LOCK(), they use their own lock.
-                */
-               if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
-                       pf_purge_expired_fragments();
-                       nloops = 0;
-               }
+       KERNEL_LOCK();
+       NET_LOCK(s);
 
-               NET_UNLOCK(s);
+       PF_LOCK();
+       /* process a fraction of the state table every second */
+       pf_purge_expired_states(1 + (pf_status.states
+           / pf_default_rule.timeout[PFTM_INTERVAL]));
+
+       /* purge other expired types every PFTM_INTERVAL seconds */
+       if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+               pf_purge_expired_src_nodes(0);
+               pf_purge_expired_rules();
+       }
+       PF_UNLOCK();
+
+       /*
+        * Fragments don't require PF_LOCK(), they use their own lock.
+        */
+       if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+               pf_purge_expired_fragments();
+               *nloops = 0;
        }
+       NET_UNLOCK(s);
+       KERNEL_UNLOCK();
+
+       if (pf_status.running)
+               timeout_add(&pf_purge_to, 1 * hz);
 }
 
 int32_t
Index: net/pf_ioctl.c
===================================================================
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.318
diff -u -p -r1.318 pf_ioctl.c
--- net/pf_ioctl.c      5 Jul 2017 11:40:17 -0000       1.318
+++ net/pf_ioctl.c      18 Jul 2017 13:29:39 -0000
@@ -231,16 +231,6 @@ pfattach(int num)
 
        /* XXX do our best to avoid a conflict */
        pf_status.hostid = arc4random();
-
-       /* require process context to purge states, so perform in a thread */
-       kthread_create_deferred(pf_thread_create, NULL);
-}
-
-void
-pf_thread_create(void *v)
-{
-       if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge"))
-               panic("pfpurge thread");
 }
 
 int
@@ -1027,6 +1017,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
                                pf_status.stateid = time_second;
                                pf_status.stateid = pf_status.stateid << 32;
                        }
+                       timeout_add(&pf_purge_to, 1 * hz);
                        pf_create_queues();
                        DPFPRINTF(LOG_NOTICE, "pf: started");
                }
@@ -2291,7 +2282,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
                            pf_default_rule_new.timeout[i];
                        if (pf_default_rule.timeout[i] == PFTM_INTERVAL &&
                            pf_default_rule.timeout[i] < old)
-                               wakeup(pf_purge_thread);
+                               task_add(softnettq, &pf_purge_task);
                }
                pfi_xcommit();
                pf_trans_set_commit();
Index: net/pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.460
diff -u -p -r1.460 pfvar.h
--- net/pfvar.h 28 Jun 2017 19:30:24 -0000      1.460
+++ net/pfvar.h 18 Jul 2017 13:30:23 -0000
@@ -1647,7 +1647,8 @@ extern int                         
pf_tbladdr_setup(struct pf
 extern void                     pf_tbladdr_remove(struct pf_addr_wrap *);
 extern void                     pf_tbladdr_copyout(struct pf_addr_wrap *);
 extern void                     pf_calc_skip_steps(struct pf_rulequeue *);
-extern void                     pf_purge_thread(void *);
+extern void                     pf_purge_timeout(void *);
+extern void                     pf_purge(void *);
 extern void                     pf_purge_expired_src_nodes();
 extern void                     pf_purge_expired_states(u_int32_t);
 extern void                     pf_purge_expired_rules();
@@ -1826,6 +1827,8 @@ const struct pfq_ops
                *pf_queue_manager(struct pf_queuespec *);
 
 extern struct pf_status        pf_status;
+extern struct task     pf_purge_task;
+extern struct timeout  pf_purge_to;
 extern struct pool     pf_frent_pl, pf_frag_pl;
 
 struct pf_pool_limit {

Reply via email to