On Mon, Jul 19, 2021 at 10:56:32PM +0200, Alexander Bluhm wrote:
> On Mon, Jul 19, 2021 at 08:02:30PM +0300, Vitaliy Makkoveev wrote:
> > I mean the case when ip_local() called by ip_ours(). Unfortunately, I'm
> > not familiar with PPTP but it looks affected because it don't use tcp or
> > udp as transport but encapsulates them into ip frames. Sorry for noise
> > if I'm wrong.
> > 
> >  +ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
> >  +{
> >  +  /* We are already in a IPv4/IPv6 local deliver loop. */
> >  +  if (af != AF_UNSPEC)
> >  +          return ip_local(mp, offp, nxt, af);
> >  +
> >  +  niq_enqueue(&ipintrq, *mp);
> >  +  *mp = NULL;
> >  +  return IPPROTO_DONE;
> >  +}
> 
> The af != AF_UNSPEC case already has the exclusive net lock.
> ipv4_input() sets AF_UNSPEC, the other case is for IP in IP header.
> The latter is called from ipintr().
> 
> I can put a NET_ASSERT_WLOCKED() into ip_local().  Still running a
> full regress with that.
> 

Thanks!

I was wrong about PPTP case, but for PPPOE case pipex(4) and pppoe(4)
are accessible with shared netlock through (*ifp->if_input) ->
ether_input().

I like to turn pipex(4) statistics into per-CPU counters and introduce
mutex(9) for MPPE stuff. This should be enough for parallelisation.

> bluhm
> 
> Index: net/if.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
> retrieving revision 1.642
> diff -u -p -r1.642 if.c
> --- net/if.c  30 Jun 2021 13:23:33 -0000      1.642
> +++ net/if.c  19 Jul 2021 14:51:31 -0000
> @@ -109,6 +109,10 @@
>  #include <netinet6/ip6_var.h>
>  #endif
>  
> +#ifdef IPSEC
> +#include <netinet/ip_ipsp.h>
> +#endif
> +
>  #ifdef MPLS
>  #include <netmpls/mpls.h>
>  #endif
> @@ -238,7 +242,7 @@ int       ifq_congestion;
>  
>  int           netisr;
>  
> -#define      NET_TASKQ       1
> +#define      NET_TASKQ       4
>  struct taskq *nettqmp[NET_TASKQ];
>  
>  struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
> @@ -815,6 +819,7 @@ void
>  if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
>  {
>       struct mbuf *m;
> +     int exclusive_lock = 0;
>  
>       if (ml_empty(ml))
>               return;
> @@ -834,10 +839,25 @@ if_input_process(struct ifnet *ifp, stru
>        * to PF globals, pipex globals, unicast and multicast addresses
>        * lists and the socket layer.
>        */
> -     NET_LOCK();
> +
> +     /*
> +      * XXXSMP IPsec data structures are not ready to be
> +      * accessed by multiple Network threads in parallel.
> +      */
> +     if (ipsec_in_use)
> +             exclusive_lock = 1;
> +     if (exclusive_lock)
> +             NET_LOCK();
> +     else
> +             NET_RLOCK_IN_SOFTNET();
> +
>       while ((m = ml_dequeue(ml)) != NULL)
>               (*ifp->if_input)(ifp, m);
> -     NET_UNLOCK();
> +
> +     if (exclusive_lock)
> +             NET_UNLOCK();
> +     else
> +             NET_RUNLOCK_IN_SOFTNET();
>  }
>  
>  void
> @@ -895,6 +915,12 @@ if_netisr(void *unused)
>                       KERNEL_UNLOCK();
>               }
>  #endif
> +             if (n & (1 << NETISR_IP))
> +                     ipintr();
> +#ifdef INET6
> +             if (n & (1 << NETISR_IPV6))
> +                     ip6intr();
> +#endif
>  #if NPPP > 0
>               if (n & (1 << NETISR_PPP)) {
>                       KERNEL_LOCK();
> @@ -3311,17 +3337,14 @@ unhandled_af(int af)
>       panic("unhandled af %d", af);
>  }
>  
> -/*
> - * XXXSMP This tunable is here to work around the fact that IPsec
> - * globals aren't ready to be accessed by multiple threads in
> - * parallel.
> - */
> -int           nettaskqs = NET_TASKQ;
> -
>  struct taskq *
>  net_tq(unsigned int ifindex)
>  {
>       struct taskq *t = NULL;
> +     static int nettaskqs;
> +
> +     if (nettaskqs == 0)
> +             nettaskqs = min(NET_TASKQ, ncpus);
>  
>       t = nettqmp[ifindex % nettaskqs];
>  
> Index: net/if_ethersubr.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.275
> diff -u -p -r1.275 if_ethersubr.c
> --- net/if_ethersubr.c        7 Jul 2021 20:19:01 -0000       1.275
> +++ net/if_ethersubr.c        19 Jul 2021 14:32:48 -0000
> @@ -222,7 +222,10 @@ ether_resolve(struct ifnet *ifp, struct 
>  
>       switch (af) {
>       case AF_INET:
> +             KERNEL_LOCK();
> +             /* XXXSMP there is a MP race in arpresolve() */
>               error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
> +             KERNEL_UNLOCK();
>               if (error)
>                       return (error);
>               eh->ether_type = htons(ETHERTYPE_IP);
> @@ -245,7 +248,10 @@ ether_resolve(struct ifnet *ifp, struct 
>               break;
>  #ifdef INET6
>       case AF_INET6:
> +             KERNEL_LOCK();
> +             /* XXXSMP there is a MP race in nd6_resolve() */
>               error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
> +             KERNEL_UNLOCK();
>               if (error)
>                       return (error);
>               eh->ether_type = htons(ETHERTYPE_IPV6);
> Index: net/ifq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 ifq.c
> --- net/ifq.c 9 Jul 2021 01:22:05 -0000       1.44
> +++ net/ifq.c 19 Jul 2021 14:32:48 -0000
> @@ -243,7 +243,7 @@ void
>  ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
>  {
>       ifq->ifq_if = ifp;
> -     ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
> +     ifq->ifq_softnet = net_tq(ifp->if_index + idx);
>       ifq->ifq_softc = NULL;
>  
>       mtx_init(&ifq->ifq_mtx, IPL_NET);
> @@ -620,7 +620,7 @@ void
>  ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
>  {
>       ifiq->ifiq_if = ifp;
> -     ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
> +     ifiq->ifiq_softnet = net_tq(ifp->if_index + idx);
>       ifiq->ifiq_softc = NULL;
>  
>       mtx_init(&ifiq->ifiq_mtx, IPL_NET);
> Index: net/netisr.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/netisr.h,v
> retrieving revision 1.55
> diff -u -p -r1.55 netisr.h
> --- net/netisr.h      5 Jan 2021 20:43:36 -0000       1.55
> +++ net/netisr.h      19 Jul 2021 14:32:48 -0000
> @@ -41,8 +41,10 @@
>   * interrupt used for scheduling the network code to calls
>   * on the lowest level routine of each protocol.
>   */
> +#define      NETISR_IP       2               /* same as AF_INET */
>  #define      NETISR_PFSYNC   5               /* for pfsync "immediate" tx */
>  #define      NETISR_ARP      18              /* same as AF_LINK */
> +#define      NETISR_IPV6     24              /* same as AF_INET6 */
>  #define      NETISR_PPP      28              /* for PPP processing */
>  #define      NETISR_BRIDGE   29              /* for bridge processing */
>  #define      NETISR_SWITCH   31              /* for switch dataplane */
> @@ -57,6 +59,8 @@ extern int  netisr;                 /* scheduling bits 
>  extern struct task if_input_task_locked;
>  
>  void arpintr(void);
> +void ipintr(void);
> +void ip6intr(void);
>  void pppintr(void);
>  void bridgeintr(void);
>  void switchintr(void);
> Index: net/pfkeyv2.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
> retrieving revision 1.218
> diff -u -p -r1.218 pfkeyv2.c
> --- net/pfkeyv2.c     14 Jul 2021 22:39:26 -0000      1.218
> +++ net/pfkeyv2.c     19 Jul 2021 14:48:34 -0000
> @@ -2019,14 +2019,6 @@ pfkeyv2_send(struct socket *so, void *me
>                       }
>                       TAILQ_INSERT_HEAD(&ipsec_policy_head, ipo, ipo_list);
>                       ipsec_in_use++;
> -                     /*
> -                      * XXXSMP IPsec data structures are not ready to be
> -                      * accessed by multiple Network threads in parallel,
> -                      * so force all packets to be processed by the first
> -                      * one.
> -                      */
> -                     extern int nettaskqs;
> -                     nettaskqs = 1;
>               } else {
>                       ipo->ipo_last_searched = ipo->ipo_flags = 0;
>               }
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.363
> diff -u -p -r1.363 ip_input.c
> --- netinet/ip_input.c        21 Jun 2021 22:09:14 -0000      1.363
> +++ netinet/ip_input.c        19 Jul 2021 20:32:15 -0000
> @@ -130,6 +130,8 @@ const struct sysctl_bounded_args ipctl_v
>       { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
>  };
>  
> +struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
> +
>  struct pool ipqent_pool;
>  struct pool ipq_pool;
>  
> @@ -143,6 +145,7 @@ static struct mbuf_queue  ipsendraw_mq;
>  extern struct niqueue                arpinq;
>  
>  int  ip_ours(struct mbuf **, int *, int, int);
> +int  ip_local(struct mbuf **, int *, int, int);
>  int  ip_dooptions(struct mbuf *, struct ifnet *);
>  int  in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
>  
> @@ -230,6 +233,43 @@ ip_init(void)
>  }
>  
>  /*
> + * Enqueue packet for local delivery.  Queuing is used as a boundary
> + * between the network layer (input/forward path) running with shared
> + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
> + */
> +int
> +ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +{
> +     /* We are already in a IPv4/IPv6 local deliver loop. */
> +     if (af != AF_UNSPEC)
> +             return ip_local(mp, offp, nxt, af);
> +
> +     niq_enqueue(&ipintrq, *mp);
> +     *mp = NULL;
> +     return IPPROTO_DONE;
> +}
> +
> +/*
> + * Dequeue and process locally delivered packets.
> + */
> +void
> +ipintr(void)
> +{
> +     struct mbuf *m;
> +     int off, nxt;
> +
> +     while ((m = niq_dequeue(&ipintrq)) != NULL) {
> +#ifdef DIAGNOSTIC
> +             if ((m->m_flags & M_PKTHDR) == 0)
> +                     panic("ipintr no HDR");
> +#endif
> +             off = 0;
> +             nxt = ip_local(&m, &off, IPPROTO_IPV4, AF_UNSPEC);
> +             KASSERT(nxt == IPPROTO_DONE);
> +     }
> +}
> +
> +/*
>   * IPv4 input routine.
>   *
>   * Checksum and byte swap header.  Process options. Forward or deliver.
> @@ -514,7 +554,7 @@ ip_input_if(struct mbuf **mp, int *offp,
>   * If fragmented try to reassemble.  Pass to next level.
>   */
>  int
> -ip_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +ip_local(struct mbuf **mp, int *offp, int nxt, int af)
>  {
>       struct mbuf *m = *mp;
>       struct ip *ip = mtod(m, struct ip *);
> @@ -522,6 +562,8 @@ ip_ours(struct mbuf **mp, int *offp, int
>       struct ipqent *ipqe;
>       int mff, hlen;
>  
> +     NET_ASSERT_WLOCKED();
> +
>       hlen = ip->ip_hl << 2;
>  
>       /*
> @@ -1665,7 +1707,8 @@ ip_sysctl(int *name, u_int namelen, void
>                   newlen));
>  #endif
>       case IPCTL_IFQUEUE:
> -             return (EOPNOTSUPP);
> +             return (sysctl_niq(name + 1, namelen - 1,
> +                 oldp, oldlenp, newp, newlen, &ipintrq));
>       case IPCTL_ARPQUEUE:
>               return (sysctl_niq(name + 1, namelen - 1,
>                   oldp, oldlenp, newp, newlen, &arpinq));
> Index: netinet/ip_var.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
> retrieving revision 1.88
> diff -u -p -r1.88 ip_var.h
> --- netinet/ip_var.h  30 Mar 2021 08:37:11 -0000      1.88
> +++ netinet/ip_var.h  19 Jul 2021 14:32:48 -0000
> @@ -248,7 +248,6 @@ void       ip_stripoptions(struct mbuf *);
>  int   ip_sysctl(int *, u_int, void *, size_t *, void *, size_t);
>  void  ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
>           struct mbuf *);
> -void  ipintr(void);
>  int   ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
>  int   ip_deliver(struct mbuf **, int *, int, int);
>  void  ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.237
> diff -u -p -r1.237 ip6_input.c
> --- netinet6/ip6_input.c      3 Jun 2021 04:47:54 -0000       1.237
> +++ netinet6/ip6_input.c      19 Jul 2021 20:31:14 -0000
> @@ -115,11 +115,14 @@
>  #include <netinet/ip_carp.h>
>  #endif
>  
> +struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
> +
>  struct cpumem *ip6counters;
>  
>  uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
>  
>  int ip6_ours(struct mbuf **, int *, int, int);
> +int ip6_local(struct mbuf **, int *, int, int);
>  int ip6_check_rh0hdr(struct mbuf *, int *);
>  int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
>  int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
> @@ -162,6 +165,43 @@ ip6_init(void)
>       ip6counters = counters_alloc(ip6s_ncounters);
>  }
>  
> +/*
> + * Enqueue packet for local delivery.  Queuing is used as a boundary
> + * between the network layer (input/forward path) running with shared
> + * NET_RLOCK_IN_SOFTNET() and the transport layer needing it exclusively.
> + */
> +int
> +ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +{
> +     /* We are already in a IPv4/IPv6 local deliver loop. */
> +     if (af != AF_UNSPEC)
> +             return ip6_local(mp, offp, nxt, af);
> +
> +     niq_enqueue(&ip6intrq, *mp);
> +     *mp = NULL;
> +     return IPPROTO_DONE;
> +}
> +
> +/*
> + * Dequeue and process locally delivered packets.
> + */
> +void
> +ip6intr(void)
> +{
> +     struct mbuf *m;
> +     int off, nxt;
> +
> +     while ((m = niq_dequeue(&ip6intrq)) != NULL) {
> +#ifdef DIAGNOSTIC
> +             if ((m->m_flags & M_PKTHDR) == 0)
> +                     panic("ip6intr no HDR");
> +#endif
> +             off = 0;
> +             nxt = ip6_local(&m, &off, IPPROTO_IPV6, AF_UNSPEC);
> +             KASSERT(nxt == IPPROTO_DONE);
> +     }
> +}
> +
>  void
>  ipv6_input(struct ifnet *ifp, struct mbuf *m)
>  {
> @@ -544,8 +584,10 @@ ip6_input_if(struct mbuf **mp, int *offp
>  }
>  
>  int
> -ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
> +ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
>  {
> +     NET_ASSERT_WLOCKED();
> +
>       if (ip6_hbhchcheck(*mp, offp, &nxt, NULL))
>               return IPPROTO_DONE;
>  
> @@ -1470,7 +1512,8 @@ ip6_sysctl(int *name, u_int namelen, voi
>               NET_UNLOCK();
>               return (error);
>       case IPV6CTL_IFQUEUE:
> -             return (EOPNOTSUPP);
> +             return (sysctl_niq(name + 1, namelen - 1,
> +                 oldp, oldlenp, newp, newlen, &ip6intrq));
>       case IPV6CTL_SOIIKEY:
>               return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
>       default:
> 

Reply via email to