On Mon, Jul 19, 2021 at 10:56:32PM +0200, Alexander Bluhm wrote: > On Mon, Jul 19, 2021 at 08:02:30PM +0300, Vitaliy Makkoveev wrote: > > I mean the case when ip_local() called by ip_ours(). Unfortunately, I'm > > not familiar with PPTP but it looks affected because it don't use tcp or > > udp as transport but encapsulates them into ip frames. Sorry for noise > > if I'm wrong. > > > > +ip_ours(struct mbuf **mp, int *offp, int nxt, int af) > > +{ > > + /* We are already in a IPv4/IPv6 local deliver loop. */ > > + if (af != AF_UNSPEC) > > + return ip_local(mp, offp, nxt, af); > > + > > + niq_enqueue(&ipintrq, *mp); > > + *mp = NULL; > > + return IPPROTO_DONE; > > +} > > The af != AF_UNSPEC case already has the exclusive net lock. > ipv4_input() sets AF_UNSPEC, the other case is for IP in IP header. > The latter is called from ipintr(). > > I can put a NET_ASSERT_WLOCKED() into ip_local(). Still running a > full regress with that. >
Thanks! I was wrong about PPTP case, but for PPPOE case pipex(4) and pppoe(4) are accessible with shared netlock through (*ifp->if_input) -> ether_input(). I like to turn pipex(4) statistics into per-CPU counters and introduce mutex(9) for MPPE stuff. This should be enough for parallelisation. > bluhm > > Index: net/if.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v > retrieving revision 1.642 > diff -u -p -r1.642 if.c > --- net/if.c 30 Jun 2021 13:23:33 -0000 1.642 > +++ net/if.c 19 Jul 2021 14:51:31 -0000 > @@ -109,6 +109,10 @@ > #include <netinet6/ip6_var.h> > #endif > > +#ifdef IPSEC > +#include <netinet/ip_ipsp.h> > +#endif > + > #ifdef MPLS > #include <netmpls/mpls.h> > #endif > @@ -238,7 +242,7 @@ int ifq_congestion; > > int netisr; > > -#define NET_TASKQ 1 > +#define NET_TASKQ 4 > struct taskq *nettqmp[NET_TASKQ]; > > struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL); > @@ -815,6 +819,7 @@ void > if_input_process(struct ifnet *ifp, struct mbuf_list *ml) > { > struct mbuf *m; > + int exclusive_lock = 0; > > if (ml_empty(ml)) > return; > @@ -834,10 +839,25 @@ if_input_process(struct ifnet *ifp, stru > * to PF globals, pipex globals, unicast and multicast addresses > * lists and the socket layer. > */ > - NET_LOCK(); > + > + /* > + * XXXSMP IPsec data structures are not ready to be > + * accessed by multiple Network threads in parallel. > + */ > + if (ipsec_in_use) > + exclusive_lock = 1; > + if (exclusive_lock) > + NET_LOCK(); > + else > + NET_RLOCK_IN_SOFTNET(); > + > while ((m = ml_dequeue(ml)) != NULL) > (*ifp->if_input)(ifp, m); > - NET_UNLOCK(); > + > + if (exclusive_lock) > + NET_UNLOCK(); > + else > + NET_RUNLOCK_IN_SOFTNET(); > } > > void > @@ -895,6 +915,12 @@ if_netisr(void *unused) > KERNEL_UNLOCK(); > } > #endif > + if (n & (1 << NETISR_IP)) > + ipintr(); > +#ifdef INET6 > + if (n & (1 << NETISR_IPV6)) > + ip6intr(); > +#endif > #if NPPP > 0 > if (n & (1 << NETISR_PPP)) { > KERNEL_LOCK(); > @@ -3311,17 +3337,14 @@ unhandled_af(int af) > panic("unhandled af %d", af); > } > > -/* > - * XXXSMP This tunable is here to work around the fact that IPsec > - * globals aren't ready to be accessed by multiple threads in > - * parallel. > - */ > -int nettaskqs = NET_TASKQ; > - > struct taskq * > net_tq(unsigned int ifindex) > { > struct taskq *t = NULL; > + static int nettaskqs; > + > + if (nettaskqs == 0) > + nettaskqs = min(NET_TASKQ, ncpus); > > t = nettqmp[ifindex % nettaskqs]; > > Index: net/if_ethersubr.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v > retrieving revision 1.275 > diff -u -p -r1.275 if_ethersubr.c > --- net/if_ethersubr.c 7 Jul 2021 20:19:01 -0000 1.275 > +++ net/if_ethersubr.c 19 Jul 2021 14:32:48 -0000 > @@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct > > switch (af) { > case AF_INET: > + KERNEL_LOCK(); > + /* XXXSMP there is a MP race in arpresolve() */ > error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); > + KERNEL_UNLOCK(); > if (error) > return (error); > eh->ether_type = htons(ETHERTYPE_IP); > @@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct > break; > #ifdef INET6 > case AF_INET6: > + KERNEL_LOCK(); > + /* XXXSMP there is a MP race in nd6_resolve() */ > error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); > + KERNEL_UNLOCK(); > if (error) > return (error); > eh->ether_type = htons(ETHERTYPE_IPV6); > Index: net/ifq.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v > retrieving revision 1.44 > diff -u -p -r1.44 ifq.c > --- net/ifq.c 9 Jul 2021 01:22:05 -0000 1.44 > +++ net/ifq.c 19 Jul 2021 14:32:48 -0000 > @@ -243,7 +243,7 @@ void > ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) > { > ifq->ifq_if = ifp; > - ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ > + ifq->ifq_softnet = net_tq(ifp->if_index + idx); > ifq->ifq_softc = NULL; > > mtx_init(&ifq->ifq_mtx, IPL_NET); > @@ -620,7 +620,7 @@ void > ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) > { > ifiq->ifiq_if = ifp; > - ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ > + ifiq->ifiq_softnet = net_tq(ifp->if_index + idx); > ifiq->ifiq_softc = NULL; > > mtx_init(&ifiq->ifiq_mtx, IPL_NET); > Index: net/netisr.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v > retrieving revision 1.55 > diff -u -p -r1.55 netisr.h > --- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55 > +++ net/netisr.h 19 Jul 2021 14:32:48 -0000 > @@ -41,8 +41,10 @@ > * interrupt used for scheduling the network code to calls > * on the lowest level routine of each protocol. > */ > +#define NETISR_IP 2 /* same as AF_INET */ > #define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */ > #define NETISR_ARP 18 /* same as AF_LINK */ > +#define NETISR_IPV6 24 /* same as AF_INET6 */ > #define NETISR_PPP 28 /* for PPP processing */ > #define NETISR_BRIDGE 29 /* for bridge processing */ > #define NETISR_SWITCH 31 /* for switch dataplane */ > @@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits > extern struct task if_input_task_locked; > > void arpintr(void); > +void ipintr(void); > +void ip6intr(void); > void pppintr(void); > void bridgeintr(void); > void switchintr(void); > Index: net/pfkeyv2.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v > retrieving revision 1.218 > diff -u -p -r1.218 pfkeyv2.c > --- net/pfkeyv2.c 14 Jul 2021 22:39:26 -0000 1.218 > +++ net/pfkeyv2.c 19 Jul 2021 14:48:34 -0000 > @@ -2019,14 +2019,6 @@ pfkeyv2_send(struct socket *so, void *me > } > TAILQ_INSERT_HEAD(&ipsec_policy_head, ipo, ipo_list); > ipsec_in_use++; > - /* > - * XXXSMP IPsec data structures are not ready to be > - * accessed by multiple Network threads in parallel, > - * so force all packets to be processed by the first > - * one. > - */ > - extern int nettaskqs; > - nettaskqs = 1; > } else { > ipo->ipo_last_searched = ipo->ipo_flags = 0; > } > Index: netinet/ip_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v > retrieving revision 1.363 > diff -u -p -r1.363 ip_input.c > --- netinet/ip_input.c 21 Jun 2021 22:09:14 -0000 1.363 > +++ netinet/ip_input.c 19 Jul 2021 20:32:15 -0000 > @@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v > { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX }, > }; > > +struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP); > + > struct pool ipqent_pool; > struct pool ipq_pool; > > @@ -143,6 +145,7 @@ static struct mbuf_queue ipsendraw_mq; > extern struct niqueue arpinq; > > int ip_ours(struct mbuf **, int *, int, int); > +int ip_local(struct mbuf **, int *, int, int); > int ip_dooptions(struct mbuf *, struct ifnet *); > int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **); > > @@ -230,6 +233,43 @@ ip_init(void) > } > > /* > + * Enqueue packet for local delivery. Queuing is used as a boundary > + * between the network layer (input/forward path) running with shared > + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. > + */ > +int > +ip_ours(struct mbuf **mp, int *offp, int nxt, int af) > +{ > + /* We are already in a IPv4/IPv6 local deliver loop. */ > + if (af != AF_UNSPEC) > + return ip_local(mp, offp, nxt, af); > + > + niq_enqueue(&ipintrq, *mp); > + *mp = NULL; > + return IPPROTO_DONE; > +} > + > +/* > + * Dequeue and process locally delivered packets. > + */ > +void > +ipintr(void) > +{ > + struct mbuf *m; > + int off, nxt; > + > + while ((m = niq_dequeue(&ipintrq)) != NULL) { > +#ifdef DIAGNOSTIC > + if ((m->m_flags & M_PKTHDR) == 0) > + panic("ipintr no HDR"); > +#endif > + off = 0; > + nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC); > + KASSERT(nxt == IPPROTO_DONE); > + } > +} > + > +/* > * IPv4 input routine. > * > * Checksum and byte swap header. Process options. Forward or deliver. > @@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp, > * If fragmented try to reassemble. Pass to next level. > */ > int > -ip_ours(struct mbuf **mp, int *offp, int nxt, int af) > +ip_local(struct mbuf **mp, int *offp, int nxt, int af) > { > struct mbuf *m = *mp; > struct ip *ip = mtod(m, struct ip *); > @@ -522,6 +562,8 @@ ip_ours(struct mbuf **mp, int *offp, int > struct ipqent *ipqe; > int mff, hlen; > > + NET_ASSERT_WLOCKED(); > + > hlen = ip->ip_hl << 2; > > /* > @@ -1665,7 +1707,8 @@ ip_sysctl(int *name, u_int namelen, void > newlen)); > #endif > case IPCTL_IFQUEUE: > - return (EOPNOTSUPP); > + return (sysctl_niq(name + 1, namelen - 1, > + oldp, oldlenp, newp, newlen, &ipintrq)); > case IPCTL_ARPQUEUE: > return (sysctl_niq(name + 1, namelen - 1, > oldp, oldlenp, newp, newlen, &arpinq)); > Index: netinet/ip_var.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v > retrieving revision 1.88 > diff -u -p -r1.88 ip_var.h > --- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88 > +++ netinet/ip_var.h 19 Jul 2021 14:32:48 -0000 > @@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *); > int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t); > void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, > struct mbuf *); > -void ipintr(void); > int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *); > int ip_deliver(struct mbuf **, int *, int, int); > void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); > Index: netinet6/ip6_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v > retrieving revision 1.237 > diff -u -p -r1.237 ip6_input.c > --- netinet6/ip6_input.c 3 Jun 2021 04:47:54 -0000 1.237 > +++ netinet6/ip6_input.c 19 Jul 2021 20:31:14 -0000 > @@ -115,11 +115,14 @@ > #include <netinet/ip_carp.h> > #endif > > +struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6); > + > struct cpumem *ip6counters; > > uint8_t ip6_soiikey[IP6_SOIIKEY_LEN]; > > int ip6_ours(struct mbuf **, int *, int, int); > +int ip6_local(struct mbuf **, int *, int, int); > int ip6_check_rh0hdr(struct mbuf *, int *); > int ip6_hbhchcheck(struct mbuf *, int *, int *, int *); > int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); > @@ -162,6 +165,43 @@ ip6_init(void) > ip6counters = counters_alloc(ip6s_ncounters); > } > > +/* > + * Enqueue packet for local delivery. Queuing is used as a boundary > + * between the network layer (input/forward path) running with shared > + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. > + */ > +int > +ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) > +{ > + /* We are already in a IPv4/IPv6 local deliver loop. */ > + if (af != AF_UNSPEC) > + return ip6_local(mp, offp, nxt, af); > + > + niq_enqueue(&ip6intrq, *mp); > + *mp = NULL; > + return IPPROTO_DONE; > +} > + > +/* > + * Dequeue and process locally delivered packets. > + */ > +void > +ip6intr(void) > +{ > + struct mbuf *m; > + int off, nxt; > + > + while ((m = niq_dequeue(&ip6intrq)) != NULL) { > +#ifdef DIAGNOSTIC > + if ((m->m_flags & M_PKTHDR) == 0) > + panic("ip6intr no HDR"); > +#endif > + off = 0; > + nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC); > + KASSERT(nxt == IPPROTO_DONE); > + } > +} > + > void > ipv6_input(struct ifnet *ifp, struct mbuf *m) > { > @@ -544,8 +584,10 @@ ip6_input_if(struct mbuf **mp, int *offp > } > > int > -ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) > +ip6_local(struct mbuf **mp, int *offp, int nxt, int af) > { > + NET_ASSERT_WLOCKED(); > + > if (ip6_hbhchcheck(*mp, offp, &nxt, NULL)) > return IPPROTO_DONE; > > @@ -1470,7 +1512,8 @@ ip6_sysctl(int *name, u_int namelen, voi > NET_UNLOCK(); > return (error); > case IPV6CTL_IFQUEUE: > - return (EOPNOTSUPP); > + return (sysctl_niq(name + 1, namelen - 1, > + oldp, oldlenp, newp, newlen, &ip6intrq)); > case IPV6CTL_SOIIKEY: > return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen)); > default: >