The diff below is a first step towards running the em(4) interrupt
handler without grabbing the kernel lock.  It runs the rx completion
path without the lock, which is the important bit to be able to test
the work that has been going on to make the network stack run on
multiple CPUs.  But the handler still grabs the kernel lock for tx
completions, rx ring refills and everything else.  Which means that it
will still grab the lock every time it runs, which may affect how it
interacts with the network stack.

The locking strategy is centered around the if_rxring interface.  The
tricky bit is to prevent the code that brings down the interface from
frees the data structures accessed by the interrupt handler while it
is running.  The interrupt handler's rx completion path checks if
there are any filled ring slots before checking them and holds the
lock while doing so.  The code that frees the rx ring now explicitly
calls

    if_rxr_init(&sc->rx_ring, 0, 0);

which clears the number of filled ring slots, so after the
initialization call completes, we can be sure the rx completion code
doesn't run again until we refill the ring.

The nice side effect if resetting the if_rxring structure this way is
that if you bring down the interface, systat mbuf correctly shows the
number of allocated mbufs on the ring as 0.  But it also resets the
water marks, which may or may not be a good idea.

ok?


Index: if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.299
diff -u -p -r1.299 if_em.c
--- if_em.c     24 Jun 2015 09:40:54 -0000      1.299
+++ if_em.c     17 Aug 2015 09:24:58 -0000
@@ -353,6 +353,8 @@ em_attach(struct device *parent, struct 
        sc = (struct em_softc *)self;
        sc->osdep.em_pa = *pa;
 
+       mtx_init(&sc->rx_mtx, IPL_NET);
+
        timeout_set(&sc->timer_handle, em_local_timer, sc);
        timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
 
@@ -922,6 +924,8 @@ em_intr(void *arg)
                refill = 1;
        }
 
+       KERNEL_LOCK();
+
        /* Link status change */
        if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
                sc->hw.get_link_status = 1;
@@ -942,6 +946,8 @@ em_intr(void *arg)
                E1000_WRITE_REG(&sc->hw, RDT, sc->last_rx_desc_filled);
        }
 
+       KERNEL_UNLOCK();
+
        return (1);
 }
 
@@ -1698,8 +1704,8 @@ em_allocate_pci_resources(struct em_soft
        sc->hw.back = &sc->osdep;
 
        intrstr = pci_intr_string(pc, ih);
-       sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET, em_intr, sc,
-                                             sc->sc_dv.dv_xname);
+       sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
+           em_intr, sc, sc->sc_dv.dv_xname);
        if (sc->sc_intrhand == NULL) {
                printf(": couldn't establish interrupt");
                if (intrstr != NULL)
@@ -2413,6 +2419,8 @@ em_txeof(struct em_softc *sc)
        if (sc->num_tx_desc_avail == sc->num_tx_desc)
                return;
 
+       KERNEL_LOCK();
+
        num_avail = sc->num_tx_desc_avail;
        first = sc->next_tx_to_clean;
        tx_desc = &sc->tx_desc_base[first];
@@ -2494,6 +2502,8 @@ em_txeof(struct em_softc *sc)
                ifp->if_timer = EM_TX_TIMEOUT;
 
        sc->num_tx_desc_avail = num_avail;
+
+       KERNEL_UNLOCK();
 }
 
 /*********************************************************************
@@ -2743,6 +2753,10 @@ em_free_receive_structures(struct em_sof
 
        INIT_DEBUGOUT("free_receive_structures: begin");
 
+       mtx_enter(&sc->rx_mtx);
+       if_rxr_init(&sc->rx_ring, 0, 0);
+       mtx_leave(&sc->rx_mtx);
+
        if (sc->rx_buffer_area != NULL) {
                rx_buffer = sc->rx_buffer_area;
                for (i = 0; i < sc->num_rx_desc; i++, rx_buffer++) {
@@ -2825,6 +2839,8 @@ em_rxfill(struct em_softc *sc)
        int post = 0;
        int i;
 
+       mtx_enter(&sc->rx_mtx);
+
        i = sc->last_rx_desc_filled;
 
        for (slots = if_rxr_get(&sc->rx_ring, sc->num_rx_desc);
@@ -2841,6 +2857,8 @@ em_rxfill(struct em_softc *sc)
 
        if_rxr_put(&sc->rx_ring, slots);
 
+       mtx_leave(&sc->rx_mtx);
+
        return (post);
 }
 
@@ -2856,6 +2874,7 @@ em_rxeof(struct em_softc *sc)
 {
        struct ifnet        *ifp = &sc->interface_data.ac_if;
        struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
+       struct mbuf_list    free_ml = MBUF_LIST_INITIALIZER();
        struct mbuf         *m;
        u_int8_t            accept_frame = 0;
        u_int8_t            eop = 0;
@@ -2867,8 +2886,11 @@ em_rxeof(struct em_softc *sc)
        struct em_buffer    *pkt;
        u_int8_t            status;
 
-       if (if_rxr_inuse(&sc->rx_ring) == 0)
+       mtx_enter(&sc->rx_mtx);
+       if (if_rxr_inuse(&sc->rx_ring) == 0) {
+               mtx_leave(&sc->rx_mtx);
                return;
+       }
 
        i = sc->next_rx_desc_to_check;
 
@@ -2988,12 +3010,12 @@ em_rxeof(struct em_softc *sc)
                        sc->dropped_pkts++;
 
                        if (sc->fmp != NULL) {
-                               m_freem(sc->fmp);
+                               ml_enqueue(&free_ml, sc->fmp);
                                sc->fmp = NULL;
                                sc->lmp = NULL;
                        }
 
-                       m_freem(m);
+                       ml_enqueue(&free_ml, m);
                }
 
                /* Advance our pointers to the next descriptor. */
@@ -3005,9 +3027,14 @@ em_rxeof(struct em_softc *sc)
            0, sizeof(*desc) * sc->num_rx_desc,
            BUS_DMASYNC_PREREAD);
 
-       if_input(ifp, &ml);
-
        sc->next_rx_desc_to_check = i;
+
+       mtx_leave(&sc->rx_mtx);
+
+       MBUF_LIST_FOREACH(&free_ml, m)
+               m_freem(m);
+
+       if_input(ifp, &ml);
 }
 
 /*********************************************************************
Index: if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
retrieving revision 1.54
diff -u -p -r1.54 if_em.h
--- if_em.h     26 Dec 2014 05:46:32 -0000      1.54
+++ if_em.h     17 Aug 2015 09:24:58 -0000
@@ -373,6 +373,7 @@ struct em_softc {
        struct em_dma_alloc     rxdma;          /* bus_dma glue for rx desc */
        struct em_rx_desc       *rx_desc_base;
        struct if_rxring        rx_ring;
+       struct mutex            rx_mtx;
        u_int32_t               next_rx_desc_to_check;
        u_int32_t               last_rx_desc_filled;
        u_int32_t               rx_buffer_len;

Reply via email to