On Thu, Apr 22, 2021 at 08:34:35PM +0200, Hrvoje Popovski wrote: > r620-1# ppupaavnamn_iinccif::ca :u p > lpptooo(ooo0llx__lcc_faacfcafccfhhhfeef__eii_fttifet8eme2m_m2__4m > m3aamgga9iig0ci8cc,__ cc_hhc0eehxcebcc,kk ::k0 :mm ,bbm ub1ufu)fpf > pp-ll> lcc ppceup > fkfrerfererneee e e llli:ils sitptsa t m mgomoeod > ddififafiuifieieldedtd: : t :iri atitetepmem,m a > ca dodaddddrdre r = 000x > fxfStopped at ml_dequeue+0x19: movq 0x8(%rax),%rcx > TID PID UID PRFLAGS PFLAGS CPU COMMAND > 376415 75094 0 0x14000 0x200 2 softnet > *419998 20034 0 0x14000 0x200 1K softnet > ml_dequeue(fffffd83b3332b98) at ml_dequeue+0x19 > arpresolve(ffff800000087048,fffffd83b2b61b68,fffffd80cd2a2500,ffff800000031d90, > ffff800024868188) at arpresolve+0x36a > ether_resolve(ffff800000087048,fffffd80cd2a2500,ffff800000031d90,fffffd83b2b61b > 68,ffff800024868188) at ether_resolve+0x1c1 > ether_output(ffff800000087048,fffffd80cd2a2500,ffff800000031d90,fffffd83b2b61b6 > 8) at ether_output+0x2c > ip_output(fffffd80cd2a2500,0,ffff800024868400,1,0,0) at ip_output+0xa2e > ip_forward(fffffd80cd2a2500,ffff800000082048,fffffd83b2b61b68,0) at > ip_forward+0x261 > ip_input_if(ffff800024868538,ffff800024868544,4,0,ffff800000082048) at > ip_input_if+0x608 > ipv4_input(ffff800000082048,fffffd80cd2a2500) at ipv4_input+0x39 > if_input_process(ffff800000082048,ffff8000248685b8) at if_input_process+0x6f > ifiq_process(ffff800000080f00) at ifiq_process+0x69 > taskq_thread(ffff80000002f100) at taskq_thread+0x81 > end trace frame: 0x0, count: 4
Looking into ARP code it does not look MP ready. ml_dequeue() is writing next pointer without lock into mbuf. I have replaced mbuf list with mbuf queue and made la_hold_total atomic. Does that fix your panic with ARP? There are more global variables in ARP to fix. But let's see if this is an improvement. bluhm Index: net/if.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v retrieving revision 1.640 diff -u -p -r1.640 if.c --- net/if.c 26 Mar 2021 22:41:06 -0000 1.640 +++ net/if.c 23 Apr 2021 14:41:10 -0000 @@ -238,7 +238,7 @@ int ifq_congestion; int netisr; -#define NET_TASKQ 1 +#define NET_TASKQ 4 struct taskq *nettqmp[NET_TASKQ]; struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL); @@ -834,10 +834,10 @@ if_input_process(struct ifnet *ifp, stru * to PF globals, pipex globals, unicast and multicast addresses * lists and the socket layer. */ - NET_LOCK(); + NET_RLOCK_IN_SOFTNET(); while ((m = ml_dequeue(ml)) != NULL) (*ifp->if_input)(ifp, m); - NET_UNLOCK(); + NET_RUNLOCK_IN_SOFTNET(); } void @@ -895,6 +895,12 @@ if_netisr(void *unused) KERNEL_UNLOCK(); } #endif + if (n & (1 << NETISR_IP)) + ipintr(); +#ifdef INET6 + if (n & (1 << NETISR_IPV6)) + ip6intr(); +#endif #if NPPP > 0 if (n & (1 << NETISR_PPP)) { KERNEL_LOCK(); @@ -3316,12 +3322,15 @@ unhandled_af(int af) * globals aren't ready to be accessed by multiple threads in * parallel. */ -int nettaskqs = NET_TASKQ; +int nettaskqs; struct taskq * net_tq(unsigned int ifindex) { struct taskq *t = NULL; + + if (nettaskqs == 0) + nettaskqs = min(NET_TASKQ, ncpus); t = nettqmp[ifindex % nettaskqs]; Index: net/if_ethersubr.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.274 diff -u -p -r1.274 if_ethersubr.c --- net/if_ethersubr.c 7 Mar 2021 06:02:32 -0000 1.274 +++ net/if_ethersubr.c 23 Apr 2021 16:20:01 -0000 @@ -245,7 +245,10 @@ ether_resolve(struct ifnet *ifp, struct break; #ifdef INET6 case AF_INET6: + KERNEL_LOCK(); + /* XXXSMP there is a MP race in nd6_resolve() */ error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); + KERNEL_UNLOCK(); if (error) return (error); eh->ether_type = htons(ETHERTYPE_IPV6); Index: net/ifq.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v retrieving revision 1.43 diff -u -p -r1.43 ifq.c --- net/ifq.c 20 Feb 2021 04:37:26 -0000 1.43 +++ net/ifq.c 23 Apr 2021 14:41:10 -0000 @@ -243,7 +243,7 @@ void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; - ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ + ifq->ifq_softnet = net_tq(ifp->if_index + idx); ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -617,7 +617,7 @@ void ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) { ifiq->ifiq_if = ifp; - ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */ + ifiq->ifiq_softnet = net_tq(ifp->if_index + idx); ifiq->ifiq_softc = NULL; mtx_init(&ifiq->ifiq_mtx, IPL_NET); Index: net/netisr.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v retrieving revision 1.55 diff -u -p -r1.55 netisr.h --- net/netisr.h 5 Jan 2021 20:43:36 -0000 1.55 +++ net/netisr.h 23 Apr 2021 14:41:10 -0000 @@ -41,8 +41,10 @@ * interrupt used for scheduling the network code to calls * on the lowest level routine of each protocol. */ +#define NETISR_IP 2 /* same as AF_INET */ #define NETISR_PFSYNC 5 /* for pfsync "immediate" tx */ #define NETISR_ARP 18 /* same as AF_LINK */ +#define NETISR_IPV6 24 /* same as AF_INET6 */ #define NETISR_PPP 28 /* for PPP processing */ #define NETISR_BRIDGE 29 /* for bridge processing */ #define NETISR_SWITCH 31 /* for switch dataplane */ @@ -57,6 +59,8 @@ extern int netisr; /* scheduling bits extern struct task if_input_task_locked; void arpintr(void); +void ipintr(void); +void ip6intr(void); void pppintr(void); void bridgeintr(void); void switchintr(void); Index: netinet/if_ether.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.243 diff -u -p -r1.243 if_ether.c --- netinet/if_ether.c 24 Jun 2020 22:03:43 -0000 1.243 +++ netinet/if_ether.c 23 Apr 2021 17:03:09 -0000 @@ -67,7 +67,7 @@ struct llinfo_arp { LIST_ENTRY(llinfo_arp) la_list; struct rtentry *la_rt; /* backpointer to rtentry */ - struct mbuf_list la_ml; /* packet hold queue */ + struct mbuf_queue la_mq; /* packet hold queue */ time_t la_refreshed; /* when was refresh sent */ int la_asked; /* number of queries sent */ }; @@ -188,7 +188,7 @@ arp_rtrequest(struct ifnet *ifp, int req break; } - ml_init(&la->la_ml); + mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET); la->la_rt = rt; rt->rt_flags |= RTF_LLINFO; if ((rt->rt_flags & RTF_LOCAL) == 0) @@ -202,7 +202,7 @@ arp_rtrequest(struct ifnet *ifp, int req LIST_REMOVE(la, la_list); rt->rt_llinfo = NULL; rt->rt_flags &= ~RTF_LLINFO; - la_hold_total -= ml_purge(&la->la_ml); + atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq)); pool_put(&arp_pool, la); break; @@ -373,18 +373,11 @@ arpresolve(struct ifnet *ifp, struct rte * response yet. Insert mbuf in hold queue if below limit * if above the limit free the queue without queuing the new packet. */ - if (la_hold_total < LA_HOLD_TOTAL) { - struct mbuf *mh; - - if (ml_len(&la->la_ml) >= LA_HOLD_QUEUE) { - mh = ml_dequeue(&la->la_ml); - la_hold_total--; - m_freem(mh); - } - ml_enqueue(&la->la_ml, m); - la_hold_total++; + if (atomic_inc_int_nv(&la_hold_total) <= LA_HOLD_TOTAL) { + if (mq_push(&la->la_mq, m) != 0) + atomic_dec_int(&la_hold_total); } else { - la_hold_total -= ml_purge(&la->la_ml); + atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq) + 1); m_freem(m); } @@ -413,7 +406,8 @@ arpresolve(struct ifnet *ifp, struct rte rt->rt_expire += arpt_down; la->la_asked = 0; la->la_refreshed = 0; - la_hold_total -= ml_purge(&la->la_ml); + atomic_sub_int(&la_hold_total, + mq_purge(&la->la_mq)); } } } @@ -599,6 +593,7 @@ arpcache(struct ifnet *ifp, struct ether struct in_addr *spa = (struct in_addr *)ea->arp_spa; char addr[INET_ADDRSTRLEN]; struct ifnet *rifp; + struct mbuf *m; unsigned int len; int changed = 0; @@ -671,20 +666,16 @@ arpcache(struct ifnet *ifp, struct ether la->la_asked = 0; la->la_refreshed = 0; - while ((len = ml_len(&la->la_ml)) != 0) { - struct mbuf *mh; + while ((m = mq_dequeue(&la->la_mq)) != NULL) { + atomic_dec_int(&la_hold_total); + len = mq_len(&la->la_mq); - mh = ml_dequeue(&la->la_ml); - la_hold_total--; + ifp->if_output(ifp, m, rt_key(rt), rt); - ifp->if_output(ifp, mh, rt_key(rt), rt); - - if (ml_len(&la->la_ml) == len) { + /* XXXSMP we discard if other CPU enqueues */ + if (mq_len(&la->la_mq) >= len) { /* mbuf is back in queue. Discard. */ - while ((mh = ml_dequeue(&la->la_ml)) != NULL) { - la_hold_total--; - m_freem(mh); - } + atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq)); break; } } @@ -698,7 +689,7 @@ arpinvalidate(struct rtentry *rt) struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; struct sockaddr_dl *sdl = satosdl(rt->rt_gateway); - la_hold_total -= ml_purge(&la->la_ml); + atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq)); sdl->sdl_alen = 0; la->la_asked = 0; } Index: netinet/ip_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.356 diff -u -p -r1.356 ip_input.c --- netinet/ip_input.c 30 Mar 2021 08:37:10 -0000 1.356 +++ netinet/ip_input.c 23 Apr 2021 14:41:10 -0000 @@ -131,6 +131,8 @@ const struct sysctl_bounded_args ipctl_v { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX }, }; +struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP); + struct pool ipqent_pool; struct pool ipq_pool; @@ -144,6 +146,7 @@ static struct mbuf_queue ipsendraw_mq; extern struct niqueue arpinq; int ip_ours(struct mbuf **, int *, int, int); +int ip_local(struct mbuf **, int *, int, int); int ip_dooptions(struct mbuf *, struct ifnet *); int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **); @@ -230,6 +233,43 @@ ip_init(void) } /* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running with shared + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. + */ +int +ip_ours(struct mbuf **mp, int *offp, int nxt, int af) +{ + /* We are already in a IPv4/IPv6 local deliver loop. */ + if (af != AF_UNSPEC) + return ip_local(mp, offp, nxt, af); + + niq_enqueue(&ipintrq, *mp); + *mp = NULL; + return IPPROTO_DONE; +} + +/* + * Dequeue and process locally delivered packets. + */ +void +ipintr(void) +{ + struct mbuf *m; + int off, nxt; + + while ((m = niq_dequeue(&ipintrq)) != NULL) { +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ipintr no HDR"); +#endif + off = 0; + nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC); + KASSERT(nxt == IPPROTO_DONE); + } +} + +/* * IPv4 input routine. * * Checksum and byte swap header. Process options. Forward or deliver. @@ -488,7 +528,7 @@ ip_input_if(struct mbuf **mp, int *offp, * If fragmented try to reassemble. Pass to next level. */ int -ip_ours(struct mbuf **mp, int *offp, int nxt, int af) +ip_local(struct mbuf **mp, int *offp, int nxt, int af) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); @@ -1639,7 +1679,8 @@ ip_sysctl(int *name, u_int namelen, void newlen)); #endif case IPCTL_IFQUEUE: - return (EOPNOTSUPP); + return (sysctl_niq(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen, &ipintrq)); case IPCTL_ARPQUEUE: return (sysctl_niq(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, &arpinq)); Index: netinet/ip_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.88 diff -u -p -r1.88 ip_var.h --- netinet/ip_var.h 30 Mar 2021 08:37:11 -0000 1.88 +++ netinet/ip_var.h 23 Apr 2021 16:39:18 -0000 @@ -248,7 +248,6 @@ void ip_stripoptions(struct mbuf *); int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); -void ipintr(void); int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *); int ip_deliver(struct mbuf **, int *, int, int); void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); Index: netinet6/ip6_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.232 diff -u -p -r1.232 ip6_input.c --- netinet6/ip6_input.c 10 Mar 2021 10:21:49 -0000 1.232 +++ netinet6/ip6_input.c 23 Apr 2021 14:41:10 -0000 @@ -115,11 +115,14 @@ #include <netinet/ip_carp.h> #endif +struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6); + struct cpumem *ip6counters; uint8_t ip6_soiikey[IP6_SOIIKEY_LEN]; int ip6_ours(struct mbuf **, int *, int, int); +int ip6_local(struct mbuf **, int *, int, int); int ip6_check_rh0hdr(struct mbuf *, int *); int ip6_hbhchcheck(struct mbuf *, int *, int *, int *); int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); @@ -162,6 +165,43 @@ ip6_init(void) ip6counters = counters_alloc(ip6s_ncounters); } +/* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running with shared + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively. + */ +int +ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) +{ + /* We are already in a IPv4/IPv6 local deliver loop. */ + if (af != AF_UNSPEC) + return ip6_local(mp, offp, nxt, af); + + niq_enqueue(&ip6intrq, *mp); + *mp = NULL; + return IPPROTO_DONE; +} + +/* + * Dequeue and process locally delivered packets. + */ +void +ip6intr(void) +{ + struct mbuf *m; + int off, nxt; + + while ((m = niq_dequeue(&ip6intrq)) != NULL) { +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ip6intr no HDR"); +#endif + off = 0; + nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC); + KASSERT(nxt == IPPROTO_DONE); + } +} + void ipv6_input(struct ifnet *ifp, struct mbuf *m) { @@ -526,7 +566,7 @@ ip6_input_if(struct mbuf **mp, int *offp } int -ip6_ours(struct mbuf **mp, int *offp, int nxt, int af) +ip6_local(struct mbuf **mp, int *offp, int nxt, int af) { if (ip6_hbhchcheck(*mp, offp, &nxt, NULL)) return IPPROTO_DONE; @@ -1452,7 +1492,8 @@ ip6_sysctl(int *name, u_int namelen, voi NET_UNLOCK(); return (error); case IPV6CTL_IFQUEUE: - return (EOPNOTSUPP); + return (sysctl_niq(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen, &ip6intrq)); case IPV6CTL_SOIIKEY: return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen)); default: