On Thu, Apr 22, 2021 at 08:34:35PM +0200, Hrvoje Popovski wrote:
> r620-1# ppupaavnamn_iinccif::ca :u p
> lpptooo(ooo0llx__lcc_faacfcafccfhhhfeef__eii_fttifet8eme2m_m2__4m
> m3aamgga9iig0ci8cc,__ cc_hhc0eehxcebcc,kk ::k0  :mm ,bbm ub1ufu)fpf
> pp-ll>  lcc  ppceup
>   fkfrerfererneee e e llli:ils sitptsa t m mgomoeod
> ddififafiuifieieldedtd: : t :iri atitetepmem,m   a
> ca dodaddddrdre r = 000x
> fxfStopped at      ml_dequeue+0x19:        movq    0x8(%rax),%rcx
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>  376415  75094      0     0x14000      0x200    2  softnet
> *419998  20034      0     0x14000      0x200    1K softnet
> ml_dequeue(fffffd83b3332b98) at ml_dequeue+0x19
> arpresolve(ffff800000087048,fffffd83b2b61b68,fffffd80cd2a2500,ffff800000031d90,
> ffff800024868188) at arpresolve+0x36a
> ether_resolve(ffff800000087048,fffffd80cd2a2500,ffff800000031d90,fffffd83b2b61b
> 68,ffff800024868188) at ether_resolve+0x1c1
> ether_output(ffff800000087048,fffffd80cd2a2500,ffff800000031d90,fffffd83b2b61b6
> 8) at ether_output+0x2c
> ip_output(fffffd80cd2a2500,0,ffff800024868400,1,0,0) at ip_output+0xa2e
> ip_forward(fffffd80cd2a2500,ffff800000082048,fffffd83b2b61b68,0) at
> ip_forward+0x261
> ip_input_if(ffff800024868538,ffff800024868544,4,0,ffff800000082048) at
> ip_input_if+0x608
> ipv4_input(ffff800000082048,fffffd80cd2a2500) at ipv4_input+0x39
> if_input_process(ffff800000082048,ffff8000248685b8) at if_input_process+0x6f
> ifiq_process(ffff800000080f00) at ifiq_process+0x69
> taskq_thread(ffff80000002f100) at taskq_thread+0x81
> end trace frame: 0x0, count: 4

Looking into ARP code it does not look MP ready.  ml_dequeue() is
writing next pointer without lock into mbuf.  I have replaced mbuf
list with mbuf queue and made la_hold_total atomic.  Does that fix
your panic with ARP?

There are more global variables in ARP to fix.  But let's see if
this is an improvement.

bluhm

Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.640
diff -u -p -r1.640 if.c
--- net/if.c    26 Mar 2021 22:41:06 -0000      1.640
+++ net/if.c    23 Apr 2021 14:41:10 -0000
@@ -238,7 +238,7 @@ int ifq_congestion;

 int             netisr;

-#define        NET_TASKQ       1
+#define        NET_TASKQ       4
 struct taskq   *nettqmp[NET_TASKQ];

 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
@@ -834,10 +834,10 @@ if_input_process(struct ifnet *ifp, stru
         * to PF globals, pipex globals, unicast and multicast addresses
         * lists and the socket layer.
         */
-       NET_LOCK();
+       NET_RLOCK_IN_SOFTNET();
        while ((m = ml_dequeue(ml)) != NULL)
                (*ifp->if_input)(ifp, m);
-       NET_UNLOCK();
+       NET_RUNLOCK_IN_SOFTNET();
 }

 void
@@ -895,6 +895,12 @@ if_netisr(void *unused)
                        KERNEL_UNLOCK();
                }
 #endif
+               if (n & (1 << NETISR_IP))
+                       ipintr();
+#ifdef INET6
+               if (n & (1 << NETISR_IPV6))
+                       ip6intr();
+#endif
 #if NPPP > 0
                if (n & (1 << NETISR_PPP)) {
                        KERNEL_LOCK();
@@ -3316,12 +3322,15 @@ unhandled_af(int af)
  * globals aren't ready to be accessed by multiple threads in
  * parallel.
  */
-int             nettaskqs = NET_TASKQ;
+int             nettaskqs;

 struct taskq *
 net_tq(unsigned int ifindex)
 {
        struct taskq *t = NULL;
+
+       if (nettaskqs == 0)
+               nettaskqs = min(NET_TASKQ, ncpus);

        t = nettqmp[ifindex % nettaskqs];

Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.274
diff -u -p -r1.274 if_ethersubr.c
--- net/if_ethersubr.c  7 Mar 2021 06:02:32 -0000       1.274
+++ net/if_ethersubr.c  23 Apr 2021 16:20:01 -0000
@@ -245,7 +245,10 @@ ether_resolve(struct ifnet *ifp, struct
                break;
 #ifdef INET6
        case AF_INET6:
+               KERNEL_LOCK();
+               /* XXXSMP there is a MP race in nd6_resolve() */
                error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
+               KERNEL_UNLOCK();
                if (error)
                        return (error);
                eh->ether_type = htons(ETHERTYPE_IPV6);
Index: net/ifq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
retrieving revision 1.43
diff -u -p -r1.43 ifq.c
--- net/ifq.c   20 Feb 2021 04:37:26 -0000      1.43
+++ net/ifq.c   23 Apr 2021 14:41:10 -0000
@@ -243,7 +243,7 @@ void
 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
 {
        ifq->ifq_if = ifp;
-       ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
+       ifq->ifq_softnet = net_tq(ifp->if_index + idx);
        ifq->ifq_softc = NULL;

        mtx_init(&ifq->ifq_mtx, IPL_NET);
@@ -617,7 +617,7 @@ void
 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
 {
        ifiq->ifiq_if = ifp;
-       ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
+       ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
        ifiq->ifiq_softc = NULL;

        mtx_init(&ifiq->ifiq_mtx, IPL_NET);
Index: net/netisr.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
retrieving revision 1.55
diff -u -p -r1.55 netisr.h
--- net/netisr.h        5 Jan 2021 20:43:36 -0000       1.55
+++ net/netisr.h        23 Apr 2021 14:41:10 -0000
@@ -41,8 +41,10 @@
  * interrupt used for scheduling the network code to calls
  * on the lowest level routine of each protocol.
  */
+#define        NETISR_IP       2               /* same as AF_INET */
 #define        NETISR_PFSYNC   5               /* for pfsync "immediate" tx */
 #define        NETISR_ARP      18              /* same as AF_LINK */
+#define        NETISR_IPV6     24              /* same as AF_INET6 */
 #define        NETISR_PPP      28              /* for PPP processing */
 #define        NETISR_BRIDGE   29              /* for bridge processing */
 #define        NETISR_SWITCH   31              /* for switch dataplane */
@@ -57,6 +59,8 @@ extern int    netisr;                 /* scheduling bits
 extern struct task if_input_task_locked;

 void   arpintr(void);
+void   ipintr(void);
+void   ip6intr(void);
 void   pppintr(void);
 void   bridgeintr(void);
 void   switchintr(void);
Index: netinet/if_ether.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.243
diff -u -p -r1.243 if_ether.c
--- netinet/if_ether.c  24 Jun 2020 22:03:43 -0000      1.243
+++ netinet/if_ether.c  23 Apr 2021 17:03:09 -0000
@@ -67,7 +67,7 @@
 struct llinfo_arp {
        LIST_ENTRY(llinfo_arp)   la_list;
        struct rtentry          *la_rt;         /* backpointer to rtentry */
-       struct mbuf_list         la_ml;         /* packet hold queue */
+       struct mbuf_queue        la_mq;         /* packet hold queue */
        time_t                   la_refreshed;  /* when was refresh sent */
        int                      la_asked;      /* number of queries sent */
 };
@@ -188,7 +188,7 @@ arp_rtrequest(struct ifnet *ifp, int req
                        break;
                }

-               ml_init(&la->la_ml);
+               mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET);
                la->la_rt = rt;
                rt->rt_flags |= RTF_LLINFO;
                if ((rt->rt_flags & RTF_LOCAL) == 0)
@@ -202,7 +202,7 @@ arp_rtrequest(struct ifnet *ifp, int req
                LIST_REMOVE(la, la_list);
                rt->rt_llinfo = NULL;
                rt->rt_flags &= ~RTF_LLINFO;
-               la_hold_total -= ml_purge(&la->la_ml);
+               atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
                pool_put(&arp_pool, la);
                break;

@@ -373,18 +373,11 @@ arpresolve(struct ifnet *ifp, struct rte
         * response yet. Insert mbuf in hold queue if below limit
         * if above the limit free the queue without queuing the new packet.
         */
-       if (la_hold_total < LA_HOLD_TOTAL) {
-               struct mbuf *mh;
-
-               if (ml_len(&la->la_ml) >= LA_HOLD_QUEUE) {
-                       mh = ml_dequeue(&la->la_ml);
-                       la_hold_total--;
-                       m_freem(mh);
-               }
-               ml_enqueue(&la->la_ml, m);
-               la_hold_total++;
+       if (atomic_inc_int_nv(&la_hold_total) <= LA_HOLD_TOTAL) {
+               if (mq_push(&la->la_mq, m) != 0)
+                       atomic_dec_int(&la_hold_total);
        } else {
-               la_hold_total -= ml_purge(&la->la_ml);
+               atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq) + 1);
                m_freem(m);
        }

@@ -413,7 +406,8 @@ arpresolve(struct ifnet *ifp, struct rte
                                rt->rt_expire += arpt_down;
                                la->la_asked = 0;
                                la->la_refreshed = 0;
-                               la_hold_total -= ml_purge(&la->la_ml);
+                               atomic_sub_int(&la_hold_total,
+                                   mq_purge(&la->la_mq));
                        }
                }
        }
@@ -599,6 +593,7 @@ arpcache(struct ifnet *ifp, struct ether
        struct in_addr *spa = (struct in_addr *)ea->arp_spa;
        char addr[INET_ADDRSTRLEN];
        struct ifnet *rifp;
+       struct mbuf *m;
        unsigned int len;
        int changed = 0;

@@ -671,20 +666,16 @@ arpcache(struct ifnet *ifp, struct ether

        la->la_asked = 0;
        la->la_refreshed = 0;
-       while ((len = ml_len(&la->la_ml)) != 0) {
-               struct mbuf *mh;
+       while ((m = mq_dequeue(&la->la_mq)) != NULL) {
+               atomic_dec_int(&la_hold_total);
+               len = mq_len(&la->la_mq);

-               mh = ml_dequeue(&la->la_ml);
-               la_hold_total--;
+               ifp->if_output(ifp, m, rt_key(rt), rt);

-               ifp->if_output(ifp, mh, rt_key(rt), rt);
-
-               if (ml_len(&la->la_ml) == len) {
+               /* XXXSMP we discard if other CPU enqueues */
+               if (mq_len(&la->la_mq) >= len) {
                        /* mbuf is back in queue. Discard. */
-                       while ((mh = ml_dequeue(&la->la_ml)) != NULL) {
-                               la_hold_total--;
-                               m_freem(mh);
-                       }
+                       atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
                        break;
                }
        }
@@ -698,7 +689,7 @@ arpinvalidate(struct rtentry *rt)
        struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
        struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);

-       la_hold_total -= ml_purge(&la->la_ml);
+       atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
        sdl->sdl_alen = 0;
        la->la_asked = 0;
 }
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.356
diff -u -p -r1.356 ip_input.c
--- netinet/ip_input.c  30 Mar 2021 08:37:10 -0000      1.356
+++ netinet/ip_input.c  23 Apr 2021 14:41:10 -0000
@@ -131,6 +131,8 @@ const struct sysctl_bounded_args ipctl_v
        { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
 };

+struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
+
 struct pool ipqent_pool;
 struct pool ipq_pool;

@@ -144,6 +146,7 @@ static struct mbuf_queue    ipsendraw_mq;
 extern struct niqueue          arpinq;

 int    ip_ours(struct mbuf **, int *, int, int);
+int    ip_local(struct mbuf **, int *, int, int);
 int    ip_dooptions(struct mbuf *, struct ifnet *);
 int    in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);

@@ -230,6 +233,43 @@ ip_init(void)
 }

 /*
+ * Enqueue packet for local delivery.  Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+       /* We are already in a IPv4/IPv6 local deliver loop. */
+       if (af != AF_UNSPEC)
+               return ip_local(mp, offp, nxt, af);
+
+       niq_enqueue(&ipintrq, *mp);
+       *mp = NULL;
+       return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ipintr(void)
+{
+       struct mbuf *m;
+       int off, nxt;
+
+       while ((m = niq_dequeue(&ipintrq)) != NULL) {
+#ifdef DIAGNOSTIC
+               if ((m->m_flags & M_PKTHDR) == 0)
+                       panic("ipintr no HDR");
+#endif
+               off = 0;
+               nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
+               KASSERT(nxt == IPPROTO_DONE);
+       }
+}
+
+/*
  * IPv4 input routine.
  *
  * Checksum and byte swap header.  Process options. Forward or deliver.
@@ -488,7 +528,7 @@ ip_input_if(struct mbuf **mp, int *offp,
  * If fragmented try to reassemble.  Pass to next level.
  */
 int
-ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip_local(struct mbuf **mp, int *offp, int nxt, int af)
 {
        struct mbuf *m = *mp;
        struct ip *ip = mtod(m, struct ip *);
@@ -1639,7 +1679,8 @@ ip_sysctl(int *name, u_int namelen, void
                    newlen));
 #endif
        case IPCTL_IFQUEUE:
-               return (EOPNOTSUPP);
+               return (sysctl_niq(name + 1, namelen - 1,
+                   oldp, oldlenp, newp, newlen, &ipintrq));
        case IPCTL_ARPQUEUE:
                return (sysctl_niq(name + 1, namelen - 1,
                    oldp, oldlenp, newp, newlen, &arpinq));
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.88
diff -u -p -r1.88 ip_var.h
--- netinet/ip_var.h    30 Mar 2021 08:37:11 -0000      1.88
+++ netinet/ip_var.h    23 Apr 2021 16:39:18 -0000
@@ -248,7 +248,6 @@ void         ip_stripoptions(struct mbuf *);
 int     ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 void    ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
            struct mbuf *);
-void    ipintr(void);
 int     ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
 int     ip_deliver(struct mbuf **, int *, int, int);
 void    ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.232
diff -u -p -r1.232 ip6_input.c
--- netinet6/ip6_input.c        10 Mar 2021 10:21:49 -0000      1.232
+++ netinet6/ip6_input.c        23 Apr 2021 14:41:10 -0000
@@ -115,11 +115,14 @@
 #include <netinet/ip_carp.h>
 #endif

+struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
+
 struct cpumem *ip6counters;

 uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];

 int ip6_ours(struct mbuf **, int *, int, int);
+int ip6_local(struct mbuf **, int *, int, int);
 int ip6_check_rh0hdr(struct mbuf *, int *);
 int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
 int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
@@ -162,6 +165,43 @@ ip6_init(void)
        ip6counters = counters_alloc(ip6s_ncounters);
 }

+/*
+ * Enqueue packet for local delivery.  Queuing is used as a boundary
+ * between the network layer (input/forward path) running with shared
+ * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
+ */
+int
+ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+{
+       /* We are already in a IPv4/IPv6 local deliver loop. */
+       if (af != AF_UNSPEC)
+               return ip6_local(mp, offp, nxt, af);
+
+       niq_enqueue(&ip6intrq, *mp);
+       *mp = NULL;
+       return IPPROTO_DONE;
+}
+
+/*
+ * Dequeue and process locally delivered packets.
+ */
+void
+ip6intr(void)
+{
+       struct mbuf *m;
+       int off, nxt;
+
+       while ((m = niq_dequeue(&ip6intrq)) != NULL) {
+#ifdef DIAGNOSTIC
+               if ((m->m_flags & M_PKTHDR) == 0)
+                       panic("ip6intr no HDR");
+#endif
+               off = 0;
+               nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
+               KASSERT(nxt == IPPROTO_DONE);
+       }
+}
+
 void
 ipv6_input(struct ifnet *ifp, struct mbuf *m)
 {
@@ -526,7 +566,7 @@ ip6_input_if(struct mbuf **mp, int *offp
 }

 int
-ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
+ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
 {
        if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
                return IPPROTO_DONE;
@@ -1452,7 +1492,8 @@ ip6_sysctl(int *name, u_int namelen, voi
                NET_UNLOCK();
                return (error);
        case IPV6CTL_IFQUEUE:
-               return (EOPNOTSUPP);
+               return (sysctl_niq(name + 1, namelen - 1,
+                   oldp, oldlenp, newp, newlen, &ip6intrq));
        case IPV6CTL_SOIIKEY:
                return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
        default:

Reply via email to