When forwarding a lot of traffic with 10G interfaces contention on the NET_LOCK() is "visible". Each time you type "ifconfig" you can go grab a coffee...
The problem has a name: pf_purge_thread(). This thread is created by default and run even if you don't have PF enabled. Every `hz' it wakes up, grab the lock and go to sleep. Since the execution of this thread is serialized with the `softnet' task, it makes more sense to execute it in the same context. This reduce the NET_LOCK() contention and implicitly preempt the packet processing. Diff below improves the situation with PF disabled, I didn't test with PF enabled. Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1037 diff -u -p -r1.1037 pf.c --- net/pf.c 4 Jul 2017 14:10:15 -0000 1.1037 +++ net/pf.c 18 Jul 2017 13:28:10 -0000 @@ -121,6 +121,10 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; +int pf_npurge; +struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); +struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); + enum pf_test_status { PF_TEST_FAIL = -1, PF_TEST_OK, @@ -1200,36 +1204,44 @@ pf_purge_expired_rules(void) } void -pf_purge_thread(void *v) +pf_purge_timeout(void *unused) { - int nloops = 0, s; - - for (;;) { - tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); - - NET_LOCK(s); + task_add(softnettq, &pf_purge_task); +} - PF_LOCK(); - /* process a fraction of the state table every second */ - pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL])); +void +pf_purge(void *xnloops) +{ + int *nloops = xnloops; + int s; - /* purge other expired types every PFTM_INTERVAL seconds */ - if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_src_nodes(0); - pf_purge_expired_rules(); - } - PF_UNLOCK(); - /* - * Fragments don't require PF_LOCK(), they use their own lock. - */ - if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); - nloops = 0; - } + KERNEL_LOCK(); + NET_LOCK(s); - NET_UNLOCK(s); + PF_LOCK(); + /* process a fraction of the state table every second */ + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL])); + + /* purge other expired types every PFTM_INTERVAL seconds */ + if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_src_nodes(0); + pf_purge_expired_rules(); + } + PF_UNLOCK(); + + /* + * Fragments don't require PF_LOCK(), they use their own lock. + */ + if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); + *nloops = 0; } + NET_UNLOCK(s); + KERNEL_UNLOCK(); + + if (pf_status.running) + timeout_add(&pf_purge_to, 1 * hz); } int32_t Index: net/pf_ioctl.c =================================================================== RCS file: /cvs/src/sys/net/pf_ioctl.c,v retrieving revision 1.318 diff -u -p -r1.318 pf_ioctl.c --- net/pf_ioctl.c 5 Jul 2017 11:40:17 -0000 1.318 +++ net/pf_ioctl.c 18 Jul 2017 13:29:39 -0000 @@ -231,16 +231,6 @@ pfattach(int num) /* XXX do our best to avoid a conflict */ pf_status.hostid = arc4random(); - - /* require process context to purge states, so perform in a thread */ - kthread_create_deferred(pf_thread_create, NULL); -} - -void -pf_thread_create(void *v) -{ - if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) - panic("pfpurge thread"); } int @@ -1027,6 +1017,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a pf_status.stateid = time_second; pf_status.stateid = pf_status.stateid << 32; } + timeout_add(&pf_purge_to, 1 * hz); pf_create_queues(); DPFPRINTF(LOG_NOTICE, "pf: started"); } @@ -2291,7 +2282,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a pf_default_rule_new.timeout[i]; if (pf_default_rule.timeout[i] == PFTM_INTERVAL && pf_default_rule.timeout[i] < old) - wakeup(pf_purge_thread); + task_add(softnettq, &pf_purge_task); } pfi_xcommit(); pf_trans_set_commit(); Index: net/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.460 diff -u -p -r1.460 pfvar.h --- net/pfvar.h 28 Jun 2017 19:30:24 -0000 1.460 +++ net/pfvar.h 18 Jul 2017 13:30:23 -0000 @@ -1647,7 +1647,8 @@ extern int pf_tbladdr_setup(struct pf extern void pf_tbladdr_remove(struct pf_addr_wrap *); extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); -extern void pf_purge_thread(void *); +extern void pf_purge_timeout(void *); +extern void pf_purge(void *); extern void pf_purge_expired_src_nodes(); extern void pf_purge_expired_states(u_int32_t); extern void pf_purge_expired_rules(); @@ -1826,6 +1827,8 @@ const struct pfq_ops *pf_queue_manager(struct pf_queuespec *); extern struct pf_status pf_status; +extern struct task pf_purge_task; +extern struct timeout pf_purge_to; extern struct pool pf_frent_pl, pf_frag_pl; struct pf_pool_limit {