On 30/05/17(Tue) 20:31, Alexandr Nedvedicky wrote: > Hello Martin, > > </snip> > > > rw_exit_write(&netlock); > > > export_pflow(cur); > > > rw_enter_write(&netlock); > > > + rw_enter_write(&pf_lock); > > > } > > > > This is not needed, you're not diffing against the latest version of > > net/pf.c. > > indeed my tree is old by couple hours. > > > > > > +extern struct rwlock pf_lock; > > > + > > > +#define PF_LOCK(s) do { \ > > > + NET_ASSERT_LOCKED(); \ > > > + rw_enter_write(&pf_lock); \ > > > + s = splsoftnet(); \ > > > + } while (0) > > > > There's no need for splsoftnet()/splx() nor splsoftassert(). > > O.K. removed, the 'int spl;' at pf.c is also gone now. > > thank you for looking at my changes. updated diff is further below
Could you you make 2 definitions for the lock? It doesn't make sense to enable them by default for now. I'd like to see you diff committed now with empty defines and an easy way to enable it. That means ok mpi@ if the defines to not take/release locks by default. > > regards > sasha > --------8<---------------8<---------------8<------------------8<-------- > diff -r 85b6b6ce74cd .hgtags > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/.hgtags Tue May 30 20:27:43 2017 +0200 > @@ -0,0 +1,1 @@ > +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline > diff -r 85b6b6ce74cd src/sys/net/pf.c > --- a/src/sys/net/pf.c Tue May 30 20:11:44 2017 +0200 > +++ b/src/sys/net/pf.c Tue May 30 20:27:43 2017 +0200 > @@ -923,7 +923,7 @@ int > pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, > struct pf_state_key **sks, struct pf_state *s) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > s->kif = kif; > if (*skw == *sks) { > @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) > { > struct pf_rule *r; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > if (SLIST_EMPTY(&pf_rule_gcl)) > return; > @@ -1207,6 +1207,7 @@ pf_purge_thread(void *v) > tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); > > NET_LOCK(s); > + PF_LOCK(); > > /* process a fraction of the state table every second */ > pf_purge_expired_states(1 + (pf_status.states > @@ -1214,13 +1215,20 @@ pf_purge_thread(void *v) > > /* purge other expired types every PFTM_INTERVAL seconds */ > if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > - pf_purge_expired_fragments(); > pf_purge_expired_src_nodes(0); > pf_purge_expired_rules(); > + } > + > + PF_UNLOCK(); > + NET_UNLOCK(s); > + > + /* > + * Fragments don't require PF_LOCK(), they use their own mutex. > + */ > + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > + pf_purge_expired_fragments(); > nloops = 0; > } > - > - NET_UNLOCK(s); > } > } > > @@ -1267,7 +1275,7 @@ pf_purge_expired_src_nodes(void) > { > struct pf_src_node *cur, *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { > next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); > @@ -1303,7 +1311,7 @@ pf_src_tree_remove_state(struct pf_state > void > pf_remove_state(struct pf_state *cur) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > /* handle load balancing related tasks */ > pf_postprocess_addr(cur); > @@ -1320,7 +1328,6 @@ pf_remove_state(struct pf_state *cur) > } > RB_REMOVE(pf_state_tree_id, &tree_id, cur); > #if NPFLOW > 0 > - if (cur->state_flags & PFSTATE_PFLOW) > export_pflow(cur); > #endif /* NPFLOW > 0 */ > #if NPFSYNC > 0 > @@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur) > { > struct pf_rule_item *ri; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > #if NPFSYNC > 0 > if (pfsync_state_in_use(cur)) > @@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxche > static struct pf_state *cur = NULL; > struct pf_state *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > while (maxcheck--) { > /* wrap to start of list when we hit the end */ > @@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd) > case IPPROTO_TCP: > sport = pd->hdr.tcp.th_sport; > dport = pd->hdr.tcp.th_dport; > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > tb = &tcbtable; > break; > case IPPROTO_UDP: > sport = pd->hdr.udp.uh_sport; > dport = pd->hdr.udp.uh_dport; > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > tb = &udbtable; > break; > default: > @@ -6673,6 +6680,7 @@ pf_test(sa_family_t af, int fwdir, struc > /* if packet sits in reassembly queue, return without error */ > if (pd.m == NULL) > return PF_PASS; > + > if (action != PF_PASS) { > #if NPFLOG > 0 > pd.pflog |= PF_LOG_FORCE; > @@ -6692,6 +6700,9 @@ pf_test(sa_family_t af, int fwdir, struc > } > pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; > > + /* lock the look-up/write section of pf_test() */ > + PF_LOCK(); > + > switch (pd.virtual_proto) { > > case PF_VPROTO_FRAGMENT: { > @@ -6711,7 +6722,7 @@ pf_test(sa_family_t af, int fwdir, struc > REASON_SET(&reason, PFRES_NORM); > DPFPRINTF(LOG_NOTICE, > "dropping IPv6 packet with ICMPv4 payload"); > - goto done; > + goto unlock; > } > action = pf_test_state_icmp(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6736,7 +6747,7 @@ pf_test(sa_family_t af, int fwdir, struc > REASON_SET(&reason, PFRES_NORM); > DPFPRINTF(LOG_NOTICE, > "dropping IPv4 packet with ICMPv6 payload"); > - goto done; > + goto unlock; > } > action = pf_test_state_icmp(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6761,7 +6772,7 @@ pf_test(sa_family_t af, int fwdir, struc > pqid = 1; > action = pf_normalize_tcp(&pd); > if (action == PF_DROP) > - goto done; > + goto unlock; > } > action = pf_test_state(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6788,6 +6799,15 @@ pf_test(sa_family_t af, int fwdir, struc > break; > } > > +unlock: > + PF_UNLOCK(); > + > + /* > + * At the moment, we rely on NET_LOCK() to prevent removal of items > + * we've collected above ('s', 'r', 'anchor' and 'ruleset'). They'll > + * have to be refcounted when NET_LOCK() is gone. > + */ > + > done: > if (action != PF_DROP) { > if (s) { > @@ -6991,6 +7011,7 @@ done: > } > > *m0 = pd.m; > + > return (action); > } > > diff -r 85b6b6ce74cd src/sys/net/pf_ioctl.c > --- a/src/sys/net/pf_ioctl.c Tue May 30 20:11:44 2017 +0200 > +++ b/src/sys/net/pf_ioctl.c Tue May 30 20:27:43 2017 +0200 > @@ -129,6 +129,8 @@ struct { > TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = > TAILQ_HEAD_INITIALIZER(pf_tags), > pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); > > +struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); > + > #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) > #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE > #endif > @@ -999,6 +1001,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > } > > NET_LOCK(s); > + PF_LOCK(); > switch (cmd) { > > case DIOCSTART: > @@ -2452,6 +2455,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > break; > } > fail: > + PF_UNLOCK(); > NET_UNLOCK(s); > return (error); > } > diff -r 85b6b6ce74cd src/sys/net/pf_norm.c > --- a/src/sys/net/pf_norm.c Tue May 30 20:11:44 2017 +0200 > +++ b/src/sys/net/pf_norm.c Tue May 30 20:27:43 2017 +0200 > @@ -39,6 +39,7 @@ > #include <sys/time.h> > #include <sys/pool.h> > #include <sys/syslog.h> > +#include <sys/mutex.h> > > #include <net/if.h> > #include <net/if_var.h> > @@ -135,6 +136,12 @@ struct pool pf_frent_pl, pf_frag_pl; > struct pool pf_state_scrub_pl; > int pf_nfrents; > > +struct mutex pf_frag_mtx; > + > +#define PF_FRAG_LOCK_INIT() mtx_init(&pf_frag_mtx, IPL_SOFTNET) > +#define PF_FRAG_LOCK() mtx_enter(&pf_frag_mtx) > +#define PF_FRAG_UNLOCK() mtx_leave(&pf_frag_mtx) > + > void > pf_normalize_init(void) > { > @@ -149,6 +156,8 @@ pf_normalize_init(void) > pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); > > TAILQ_INIT(&pf_fragqueue); > + > + PF_FRAG_LOCK_INIT(); > } > > static __inline int > @@ -176,15 +185,18 @@ pf_purge_expired_fragments(void) > struct pf_fragment *frag; > int32_t expire; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_UNLOCKED(); > > expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG]; > + > + PF_FRAG_LOCK(); > while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { > if (frag->fr_timeout > expire) > break; > DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); > pf_free_fragment(frag); > } > + PF_FRAG_UNLOCK(); > } > > /* > @@ -533,14 +545,19 @@ pf_reassemble(struct mbuf **m0, int dir, > key.fr_id = ip->ip_id; > key.fr_direction = dir; > > - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) > + PF_FRAG_LOCK(); > + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { > + PF_FRAG_UNLOCK(); > return (PF_DROP); > + } > > /* The mbuf is part of the fragment entry, no direct free or access */ > m = *m0 = NULL; > > - if (!pf_isfull_fragment(frag)) > + if (!pf_isfull_fragment(frag)) { > + PF_FRAG_UNLOCK(); > return (PF_PASS); /* drop because *m0 is NULL, no error */ > + } > > /* We have all the data */ > frent = TAILQ_FIRST(&frag->fr_queue); > @@ -564,6 +581,7 @@ pf_reassemble(struct mbuf **m0, int dir, > ip->ip_off &= ~(IP_MF|IP_OFFMASK); > > if (hdrlen + total > IP_MAXPACKET) { > + PF_FRAG_UNLOCK(); > DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); > ip->ip_len = 0; > REASON_SET(reason, PFRES_SHORT); > @@ -571,6 +589,7 @@ pf_reassemble(struct mbuf **m0, int dir, > return (PF_DROP); > } > > + PF_FRAG_UNLOCK(); > DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len)); > return (PF_PASS); > } > @@ -610,14 +629,19 @@ pf_reassemble6(struct mbuf **m0, struct > key.fr_id = fraghdr->ip6f_ident; > key.fr_direction = dir; > > - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) > + PF_FRAG_LOCK(); > + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { > + PF_FRAG_UNLOCK(); > return (PF_DROP); > + } > > /* The mbuf is part of the fragment entry, no direct free or access */ > m = *m0 = NULL; > > - if (!pf_isfull_fragment(frag)) > + if (!pf_isfull_fragment(frag)) { > + PF_FRAG_UNLOCK(); > return (PF_PASS); /* drop because *m0 is NULL, no error */ > + } > > /* We have all the data */ > extoff = frent->fe_extoff; > @@ -671,17 +695,20 @@ pf_reassemble6(struct mbuf **m0, struct > ip6->ip6_nxt = proto; > > if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { > + PF_FRAG_UNLOCK(); > DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); > ip6->ip6_plen = 0; > REASON_SET(reason, PFRES_SHORT); > /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ > return (PF_DROP); > } > + PF_FRAG_UNLOCK(); > > DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); > return (PF_PASS); > > fail: > + PF_FRAG_UNLOCK(); > REASON_SET(reason, PFRES_MEMORY); > /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ > return (PF_DROP); > diff -r 85b6b6ce74cd src/sys/net/pfvar_priv.h > --- a/src/sys/net/pfvar_priv.h Tue May 30 20:11:44 2017 +0200 > +++ b/src/sys/net/pfvar_priv.h Tue May 30 20:27:43 2017 +0200 > @@ -37,6 +37,10 @@ > > #ifdef _KERNEL > > +#include <sys/rwlock.h> > + > +extern struct rwlock pf_lock; > + > struct pf_pdesc { > struct { > int done; > @@ -94,6 +98,29 @@ struct pf_pdesc { > } hdr; > }; > > +extern struct rwlock pf_lock; > + > +#define PF_LOCK() do { \ > + NET_ASSERT_LOCKED(); \ > + rw_enter_write(&pf_lock); \ > + } while (0) > + > +#define PF_UNLOCK() do { \ > + PF_ASSERT_LOCKED(); \ > + rw_exit_write(&pf_lock); \ > + } while (0) > + > +#define PF_ASSERT_LOCKED() do { \ > + if (rw_status(&pf_lock) != RW_WRITE) \ > + splassert_fail(RW_WRITE, \ > + rw_status(&pf_lock),__func__);\ > + } while (0) > + > +#define PF_ASSERT_UNLOCKED() do { \ > + if (rw_status(&pf_lock) == RW_WRITE) \ > + splassert_fail(0, rw_status(&pf_lock), __func__);\ > + } while (0) > + > #endif /* _KERNEL */ > > #endif /* _NET_PFVAR_PRIV_H_ */ > --------8<---------------8<---------------8<------------------8<-------- >