> Date: Fri, 16 Oct 2015 14:13:52 +0200 > From: Martin Pieuchot <m...@openbsd.org> > > On 08/10/15(Thu) 20:49, Mark Kettenis wrote: > > > Date: Wed, 30 Sep 2015 14:30:11 +0200 (CEST) > > > From: Mark Kettenis <mark.kette...@xs4all.nl> > > > > > > Since people seemed to like my diff for em(4), here is one for ix(4). > > > In addition to unlocking the rx completion path, this one also uses > > > intr_barrier() and removes the rx mutex that claudio@ put in recently. > > > > > > I don't have any ix(4) hardware. So the only guarantee is that this > > > compiles ;). > > > > Based on the experience with em(4), here is an updated diff. > > What's happening to this? Mark did you get any test report?
Got a report from Hrvoje Popovski that this triggers OACTIVE a lot and sometimes even gets stuck completely. > > Index: if_ix.c > > =================================================================== > > RCS file: /home/cvs/src/sys/dev/pci/if_ix.c,v > > retrieving revision 1.125 > > diff -u -p -r1.125 if_ix.c > > --- if_ix.c 11 Sep 2015 12:09:10 -0000 1.125 > > +++ if_ix.c 8 Oct 2015 18:43:36 -0000 > > @@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru > > sc->osdep.os_sc = sc; > > sc->osdep.os_pa = *pa; > > > > - mtx_init(&sc->rx_mtx, IPL_NET); > > - > > /* Set up the timer callout */ > > timeout_set(&sc->timer, ixgbe_local_timer, sc); > > timeout_set(&sc->rx_refill, ixgbe_rxrefill, sc); > > @@ -529,10 +527,7 @@ ixgbe_watchdog(struct ifnet * ifp) > > > > /* > > * The timer is set to 5 every time ixgbe_start() queues a packet. > > - * Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at > > - * least one descriptor. > > - * Finally, anytime all descriptors are clean the timer is > > - * set to 0. > > + * Anytime all descriptors are clean the timer is set to 0. > > */ > > for (i = 0; i < sc->num_queues; i++, txr++) { > > if (txr->watchdog_timer == 0 || --txr->watchdog_timer) > > @@ -864,7 +859,7 @@ ixgbe_intr(void *arg) > > struct tx_ring *txr = sc->tx_rings; > > struct ixgbe_hw *hw = &sc->hw; > > uint32_t reg_eicr; > > - int i, refill = 0, was_active = 0; > > + int i, refill = 0; > > > > reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR); > > if (reg_eicr == 0) { > > @@ -874,11 +869,8 @@ ixgbe_intr(void *arg) > > > > if (ISSET(ifp->if_flags, IFF_RUNNING)) { > > ixgbe_rxeof(que); > > - refill = 1; > > - > > - if (ISSET(ifp->if_flags, IFF_OACTIVE)) > > - was_active = 1; > > ixgbe_txeof(txr); > > + refill = 1; > > } > > > > if (refill) { > > @@ -894,6 +886,8 @@ ixgbe_intr(void *arg) > > if (reg_eicr & IXGBE_EICR_LSC) { > > KERNEL_LOCK(); > > ixgbe_update_link_status(sc); > > + if (!IFQ_IS_EMPTY(&ifp->if_snd)) > > + ixgbe_start(ifp); > > KERNEL_UNLOCK(); > > } > > > > @@ -915,12 +909,6 @@ ixgbe_intr(void *arg) > > } > > } > > > > - if (was_active) { > > - KERNEL_LOCK(); > > - ixgbe_start(ifp); > > - KERNEL_UNLOCK(); > > - } > > - > > /* Check for fan failure */ > > if ((hw->device_id == IXGBE_DEV_ID_82598AT) && > > (reg_eicr & IXGBE_EICR_GPI_SDP1)) { > > @@ -1063,17 +1051,9 @@ ixgbe_encap(struct tx_ring *txr, struct > > cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; > > #endif > > > > - /* > > - * Force a cleanup if number of TX descriptors > > - * available is below the threshold. If it fails > > - * to get above, then abort transmit. > > - */ > > - if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) { > > - ixgbe_txeof(txr); > > - /* Make sure things have improved */ > > - if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD) > > - return (ENOBUFS); > > - } > > + /* Check that we have least the minimal number of TX descriptors. */ > > + if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD) > > + return (ENOBUFS); > > > > /* > > * Important to capture the first descriptor > > @@ -1137,8 +1117,6 @@ ixgbe_encap(struct tx_ring *txr, struct > > > > txd->read.cmd_type_len |= > > htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); > > - txr->tx_avail -= map->dm_nsegs; > > - txr->next_avail_desc = i; > > > > txbuf->m_head = m_head; > > /* > > @@ -1156,6 +1134,11 @@ ixgbe_encap(struct tx_ring *txr, struct > > txbuf = &txr->tx_buffers[first]; > > txbuf->eop_index = last; > > > > + membar_producer(); > > + > > + atomic_sub_int(&txr->tx_avail, map->dm_nsegs); > > + txr->next_avail_desc = i; > > + > > ++txr->tx_packets; > > return (0); > > > > @@ -1323,6 +1306,10 @@ ixgbe_stop(void *arg) > > /* reprogram the RAR[0] in case user changed it. */ > > ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV); > > > > + intr_barrier(sc->tag); > > + > > + KASSERT((ifp->if_flags & IFF_RUNNING) == 0); > > + > > /* Should we really clear all structures on stop? */ > > ixgbe_free_transmit_structures(sc); > > ixgbe_free_receive_structures(sc); > > @@ -2203,11 +2190,13 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, > > tx_buffer->m_head = NULL; > > tx_buffer->eop_index = -1; > > > > + membar_producer(); > > + > > /* We've consumed the first desc, adjust counters */ > > if (++ctxd == sc->num_tx_desc) > > ctxd = 0; > > txr->next_avail_desc = ctxd; > > - --txr->tx_avail; > > + atomic_dec_int(&txr->tx_avail); > > > > return (0); > > } > > @@ -2321,10 +2310,12 @@ ixgbe_tso_setup(struct tx_ring *txr, str > > > > TXD->seqnum_seed = htole32(0); > > > > + membar_producer(); > > + > > if (++ctxd == sc->num_tx_desc) > > ctxd = 0; > > > > - txr->tx_avail--; > > + atomic_dec_int(&txr->tx_avail); > > txr->next_avail_desc = ctxd; > > *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; > > *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; > > @@ -2346,6 +2337,7 @@ ixgbe_txeof(struct tx_ring *txr) > > struct ix_softc *sc = txr->sc; > > struct ifnet *ifp = &sc->arpcom.ac_if; > > uint32_t first, last, done, processed; > > + uint32_t num_avail; > > struct ixgbe_tx_buf *tx_buffer; > > struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc; > > > > @@ -2357,23 +2349,19 @@ ixgbe_txeof(struct tx_ring *txr) > > return FALSE; > > } > > > > - KERNEL_LOCK(); > > + membar_consumer(); > > > > processed = 0; > > first = txr->next_to_clean; > > /* was the txt queue cleaned up in the meantime */ > > - if (txr->tx_buffers == NULL) { > > - KERNEL_UNLOCK(); > > + if (txr->tx_buffers == NULL) > > return FALSE; > > - } > > tx_buffer = &txr->tx_buffers[first]; > > /* For cleanup we just use legacy struct */ > > tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first]; > > last = tx_buffer->eop_index; > > - if (last == -1) { > > - KERNEL_UNLOCK(); > > + if (last == -1) > > return FALSE; > > - } > > eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last]; > > > > /* > > @@ -2395,7 +2383,6 @@ ixgbe_txeof(struct tx_ring *txr) > > tx_desc->upper.data = 0; > > tx_desc->lower.data = 0; > > tx_desc->buffer_addr = 0; > > - ++txr->tx_avail; > > ++processed; > > > > if (tx_buffer->m_head) { > > @@ -2437,31 +2424,24 @@ ixgbe_txeof(struct tx_ring *txr) > > > > txr->next_to_clean = first; > > > > + num_avail = atomic_add_int_nv(&txr->tx_avail, processed); > > + > > + /* All clean, turn off the timer */ > > + if (num_avail == sc->num_tx_desc) > > + ifp->if_timer = 0; > > + > > /* > > * If we have enough room, clear IFF_OACTIVE to tell the stack that > > - * it is OK to send packets. If there are no pending descriptors, > > - * clear the timeout. Otherwise, if some descriptors have been freed, > > - * restart the timeout. > > + * it is OK to send packets. > > */ > > - if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD) { > > - ifp->if_flags &= ~IFF_OACTIVE; > > - > > - /* If all are clean turn off the timer */ > > - if (txr->tx_avail == sc->num_tx_desc) { > > - ifp->if_timer = 0; > > - txr->watchdog_timer = 0; > > - KERNEL_UNLOCK(); > > - return FALSE; > > - } > > - /* Some were cleaned, so reset timer */ > > - else if (processed) { > > - ifp->if_timer = IXGBE_TX_TIMEOUT; > > - txr->watchdog_timer = IXGBE_TX_TIMEOUT; > > - } > > + if (ISSET(ifp->if_flags, IFF_OACTIVE) && > > + num_avail > IXGBE_TX_OP_THRESHOLD) { > > + KERNEL_LOCK(); > > + CLR(ifp->if_flags, IFF_OACTIVE); > > + ixgbe_start(ifp); > > + KERNEL_UNLOCK(); > > } > > > > - KERNEL_UNLOCK(); > > - > > return TRUE; > > } > > > > @@ -2611,8 +2591,6 @@ ixgbe_rxfill(struct rx_ring *rxr) > > u_int slots; > > int i; > > > > - mtx_enter(&sc->rx_mtx); > > - > > i = rxr->last_desc_filled; > > for (slots = if_rxr_get(&rxr->rx_ring, sc->num_rx_desc); > > slots > 0; slots--) { > > @@ -2628,8 +2606,6 @@ ixgbe_rxfill(struct rx_ring *rxr) > > > > if_rxr_put(&rxr->rx_ring, slots); > > > > - mtx_leave(&sc->rx_mtx); > > - > > return (post); > > } > > > > @@ -2797,10 +2773,8 @@ ixgbe_free_receive_structures(struct ix_ > > struct rx_ring *rxr; > > int i; > > > > - mtx_enter(&sc->rx_mtx); > > for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++) > > if_rxr_init(&rxr->rx_ring, 0, 0); > > - mtx_leave(&sc->rx_mtx); > > > > for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++) > > ixgbe_free_receive_buffers(rxr); > > @@ -2854,7 +2828,6 @@ ixgbe_rxeof(struct ix_queue *que) > > struct rx_ring *rxr = que->rxr; > > struct ifnet *ifp = &sc->arpcom.ac_if; > > struct mbuf_list ml = MBUF_LIST_INITIALIZER(); > > - struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); > > struct mbuf *mp, *sendmp; > > uint8_t eop = 0; > > uint16_t len, vtag; > > @@ -2867,7 +2840,6 @@ ixgbe_rxeof(struct ix_queue *que) > > if (!ISSET(ifp->if_flags, IFF_RUNNING)) > > return FALSE; > > > > - mtx_enter(&sc->rx_mtx); > > i = rxr->next_to_check; > > while (if_rxr_inuse(&rxr->rx_ring) > 0) { > > bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, > > @@ -2902,11 +2874,11 @@ ixgbe_rxeof(struct ix_queue *que) > > sc->dropped_pkts++; > > > > if (rxbuf->fmp) { > > - ml_enqueue(&free_ml, rxbuf->fmp); > > + m_freem(rxbuf->fmp); > > rxbuf->fmp = NULL; > > } > > > > - ml_enqueue(&free_ml, mp); > > + m_freem(mp); > > rxbuf->buf = NULL; > > goto next_desc; > > } > > @@ -2984,10 +2956,6 @@ next_desc: > > i = 0; > > } > > rxr->next_to_check = i; > > - mtx_leave(&sc->rx_mtx); > > - > > - while ((mp = ml_dequeue(&free_ml))) > > - m_freem(mp); > > > > if_input(ifp, &ml); > > > > Index: if_ix.h > > =================================================================== > > RCS file: /home/cvs/src/sys/dev/pci/if_ix.h,v > > retrieving revision 1.29 > > diff -u -p -r1.29 if_ix.h > > --- if_ix.h 11 Sep 2015 13:02:28 -0000 1.29 > > +++ if_ix.h 30 Sep 2015 12:17:09 -0000 > > @@ -77,10 +77,9 @@ > > #define IXGBE_TX_TIMEOUT 5 /* set to 5 seconds */ > > > > /* > > - * This parameters control when the driver calls the routine to reclaim > > - * transmit descriptors. > > + * Thise parameter controls the minimum number of available transmit > > + * descriptors needed before we attempt transmission of a packet. > > */ > > -#define IXGBE_TX_CLEANUP_THRESHOLD (sc->num_tx_desc / 16) > > #define IXGBE_TX_OP_THRESHOLD (sc->num_tx_desc / 32) > > > > #define IXGBE_MAX_FRAME_SIZE 9216 > > @@ -170,7 +169,7 @@ struct tx_ring { > > union ixgbe_adv_tx_desc *tx_base; > > struct ixgbe_tx_buf *tx_buffers; > > struct ixgbe_dma_alloc txdma; > > - volatile uint16_t tx_avail; > > + volatile uint32_t tx_avail; > > uint32_t next_avail_desc; > > uint32_t next_to_clean; > > enum { > > @@ -277,7 +276,6 @@ struct ix_softc { > > * Receive rings: > > * Allocated at run time, an array of rings. > > */ > > - struct mutex rx_mtx; > > struct rx_ring *rx_rings; > > uint64_t que_mask; > > int num_rx_desc; > > > >