ping...

On Fri, Nov 04, 2022 at 10:04:35PM +0300, Vitaliy Makkoveev wrote:
> Each pflow(4) interface has associated socket, referenced as sc->so. We
> set this socket in pflowioctl() which is called with both kernel and net
> locks held. In the pflow_output_process() task we do sc->so dereference,
> which is protected by kernel lock. But the sosend(), called deeper by
> pflow_output_process(), has sleep points, introduced by solock(). So
> this kernel lock protection doesn't work as expected, and concurrent
> pflowioctl() could override this sc->so.
> 
> The diff below introduces per pflow(4) instance `sc_lock' rwlock(9) to
> protect sc->so. Since the solock() of udp(4) sockets uses netlock as
> backend, the `sc_lock' should be taken first. This expands a little
> netlock relocking within pflowioctl().
> 
> Also, pflow_sendout_mbuf() called by pflow_output_process(), now called
> without kernel lock held, so the mp safe counters_pkt(9) used instead
> of manual `if_opackets' increment.
> 
> Since if_detach() does some ifnet destruction, now it can't be called
> before we finish pflow_output_process() task, otherwise we introduce use
> after free for interface counters. In other hand, we need to deny
> pflowioctl() to reschedule pflow_output_process() task. The `sc_dyind'
> flag introduced for that.
> 
> Hrvoje, could you test this diff please?
> 
> Index: sys/net/if_pflow.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_pflow.c,v
> retrieving revision 1.96
> diff -u -p -r1.96 if_pflow.c
> --- sys/net/if_pflow.c        12 Aug 2022 16:38:50 -0000      1.96
> +++ sys/net/if_pflow.c        4 Nov 2022 18:23:27 -0000
> @@ -132,11 +132,11 @@ pflow_output_process(void *arg)
>       struct mbuf *m;
>  
>       mq_delist(&sc->sc_outputqueue, &ml);
> -     KERNEL_LOCK();
> +     rw_enter_read(&sc->sc_lock);
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               pflow_sendout_mbuf(sc, m);
>       }
> -     KERNEL_UNLOCK();
> +     rw_exit_read(&sc->sc_lock);
>  }
>  
>  int
> @@ -146,6 +146,7 @@ pflow_clone_create(struct if_clone *ifc,
>       struct pflow_softc      *pflowif;
>  
>       pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
> +     rw_init(&pflowif->sc_lock, "pflowlk");
>       MGET(pflowif->send_nam, M_WAIT, MT_SONAME);
>       pflowif->sc_version = PFLOW_PROTO_DEFAULT;
>  
> @@ -254,6 +255,7 @@ pflow_clone_create(struct if_clone *ifc,
>       mq_init(&pflowif->sc_outputqueue, 8192, IPL_SOFTNET);
>       pflow_setmtu(pflowif, ETHERMTU);
>       pflow_init_timeouts(pflowif);
> +     if_counters_alloc(ifp);
>       if_attach(ifp);
>       if_alloc_sadl(ifp);
>  
> @@ -275,6 +277,7 @@ pflow_clone_destroy(struct ifnet *ifp)
>       error = 0;
>  
>       NET_LOCK();
> +     sc->sc_dying = 1;
>       SLIST_REMOVE(&pflowif_list, sc, pflow_softc, sc_next);
>       NET_UNLOCK();
>  
> @@ -475,10 +478,17 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>       struct pflowreq          pflowr;
>       int                      error;
>  
> +     if (sc->sc_dying)
> +             return ENXIO;
> +
>       switch (cmd) {
>       case SIOCSIFADDR:
>       case SIOCSIFDSTADDR:
>       case SIOCSIFFLAGS:
> +             /* XXXSMP: enforce lock order */
> +             NET_UNLOCK();
> +             rw_enter_read(&sc->sc_lock);
> +             NET_LOCK();
>               if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
>                       ifp->if_flags |= IFF_RUNNING;
>                       sc->sc_gcounter=pflowstats.pflow_flows;
> @@ -487,6 +497,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>                               pflow_sendout_ipfix_tmpl(sc);
>               } else
>                       ifp->if_flags &= ~IFF_RUNNING;
> +             rw_exit_read(&sc->sc_lock);
>               break;
>       case SIOCSIFMTU:
>               if (ifr->ifr_mtu < PFLOW_MINMTU)
> @@ -523,10 +534,13 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>  
>               /* XXXSMP breaks atomicity */
>               NET_UNLOCK();
> +             rw_enter_write(&sc->sc_lock);
>               error = pflow_set(sc, &pflowr);
>               NET_LOCK();
> -             if (error != 0)
> +             if (error != 0) {
> +                     rw_exit_write(&sc->sc_lock);
>                       return (error);
> +             }
>  
>               if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
>                       ifp->if_flags |= IFF_RUNNING;
> @@ -535,6 +549,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>                               pflow_sendout_ipfix_tmpl(sc);
>               } else
>                       ifp->if_flags &= ~IFF_RUNNING;
> +             rw_exit_write(&sc->sc_lock);
>  
>               break;
>  
> @@ -1196,8 +1211,8 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
>  int
>  pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
>  {
> -     sc->sc_if.if_opackets++;
> -     sc->sc_if.if_obytes += m->m_pkthdr.len;
> +     counters_pkt(sc->sc_if.if_counters,
> +                 ifc_opackets, ifc_obytes, m->m_pkthdr.len);
>  
>       if (sc->so == NULL) {
>               m_freem(m);
> Index: sys/net/if_pflow.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_pflow.h,v
> retrieving revision 1.18
> diff -u -p -r1.18 if_pflow.h
> --- sys/net/if_pflow.h        12 Aug 2022 16:38:50 -0000      1.18
> +++ sys/net/if_pflow.h        4 Nov 2022 18:23:27 -0000
> @@ -169,7 +169,16 @@ struct pflow_ipfix_flow6 {
>  
>  #ifdef _KERNEL
>  
> +/*
> + * Locks used to protect struct members and global data
> + *       N       net lock
> + *       p       this pflow_softc' `sc_lock'
> + */
> +
>  struct pflow_softc {
> +     struct rwlock            sc_lock;
> +
> +     int                      sc_dying;      /* N */
>       struct ifnet             sc_if;
>  
>       unsigned int             sc_count;
> @@ -185,7 +194,7 @@ struct pflow_softc {
>       struct timeout           sc_tmo_tmpl;
>       struct mbuf_queue        sc_outputqueue;
>       struct task              sc_outputtask;
> -     struct socket           *so;
> +     struct socket           *so;            /* [p] */
>       struct mbuf             *send_nam;
>       struct sockaddr         *sc_flowsrc;
>       struct sockaddr         *sc_flowdst;
> 

Reply via email to