> Date: Fri, 16 Oct 2015 14:13:52 +0200
> From: Martin Pieuchot <m...@openbsd.org>
> 
> On 08/10/15(Thu) 20:49, Mark Kettenis wrote:
> > > Date: Wed, 30 Sep 2015 14:30:11 +0200 (CEST)
> > > From: Mark Kettenis <mark.kette...@xs4all.nl>
> > > 
> > > Since people seemed to like my diff for em(4), here is one for ix(4).
> > > In addition to unlocking the rx completion path, this one also uses
> > > intr_barrier() and removes the rx mutex that claudio@ put in recently.
> > > 
> > > I don't have any ix(4) hardware.  So the only guarantee is that this
> > > compiles ;).
> > 
> > Based on the experience with em(4), here is an updated diff.
> 
> What's happening to this?  Mark did you get any test report?

Got a report from Hrvoje Popovski that this triggers OACTIVE a lot and
sometimes even gets stuck completely.

> > Index: if_ix.c
> > ===================================================================
> > RCS file: /home/cvs/src/sys/dev/pci/if_ix.c,v
> > retrieving revision 1.125
> > diff -u -p -r1.125 if_ix.c
> > --- if_ix.c 11 Sep 2015 12:09:10 -0000      1.125
> > +++ if_ix.c 8 Oct 2015 18:43:36 -0000
> > @@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru
> >     sc->osdep.os_sc = sc;
> >     sc->osdep.os_pa = *pa;
> >  
> > -   mtx_init(&sc->rx_mtx, IPL_NET);
> > -
> >     /* Set up the timer callout */
> >     timeout_set(&sc->timer, ixgbe_local_timer, sc);
> >     timeout_set(&sc->rx_refill, ixgbe_rxrefill, sc);
> > @@ -529,10 +527,7 @@ ixgbe_watchdog(struct ifnet * ifp)
> >  
> >     /*
> >      * The timer is set to 5 every time ixgbe_start() queues a packet.
> > -    * Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
> > -    * least one descriptor.
> > -    * Finally, anytime all descriptors are clean the timer is
> > -    * set to 0.
> > +    * Anytime all descriptors are clean the timer is set to 0.
> >      */
> >     for (i = 0; i < sc->num_queues; i++, txr++) {
> >             if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
> > @@ -864,7 +859,7 @@ ixgbe_intr(void *arg)
> >     struct tx_ring  *txr = sc->tx_rings;
> >     struct ixgbe_hw *hw = &sc->hw;
> >     uint32_t         reg_eicr;
> > -   int              i, refill = 0, was_active = 0;
> > +   int              i, refill = 0;
> >  
> >     reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
> >     if (reg_eicr == 0) {
> > @@ -874,11 +869,8 @@ ixgbe_intr(void *arg)
> >  
> >     if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> >             ixgbe_rxeof(que);
> > -           refill = 1;
> > -
> > -           if (ISSET(ifp->if_flags, IFF_OACTIVE))
> > -                   was_active = 1;
> >             ixgbe_txeof(txr);
> > +           refill = 1;
> >     }
> >  
> >     if (refill) {
> > @@ -894,6 +886,8 @@ ixgbe_intr(void *arg)
> >     if (reg_eicr & IXGBE_EICR_LSC) {
> >             KERNEL_LOCK();
> >             ixgbe_update_link_status(sc);
> > +           if (!IFQ_IS_EMPTY(&ifp->if_snd))
> > +                   ixgbe_start(ifp);
> >             KERNEL_UNLOCK();
> >     }
> >  
> > @@ -915,12 +909,6 @@ ixgbe_intr(void *arg)
> >             }
> >     }
> >  
> > -   if (was_active) {
> > -           KERNEL_LOCK();
> > -           ixgbe_start(ifp);
> > -           KERNEL_UNLOCK();
> > -   }
> > -
> >     /* Check for fan failure */
> >     if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
> >         (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
> > @@ -1063,17 +1051,9 @@ ixgbe_encap(struct tx_ring *txr, struct 
> >             cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
> >  #endif
> >  
> > -   /*
> > -    * Force a cleanup if number of TX descriptors
> > -    * available is below the threshold. If it fails
> > -    * to get above, then abort transmit.
> > -    */
> > -   if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
> > -           ixgbe_txeof(txr);
> > -           /* Make sure things have improved */
> > -           if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> > -                   return (ENOBUFS);
> > -   }
> > +   /* Check that we have least the minimal number of TX descriptors. */
> > +   if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> > +           return (ENOBUFS);
> >  
> >     /*
> >      * Important to capture the first descriptor
> > @@ -1137,8 +1117,6 @@ ixgbe_encap(struct tx_ring *txr, struct 
> >  
> >     txd->read.cmd_type_len |=
> >         htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
> > -   txr->tx_avail -= map->dm_nsegs;
> > -   txr->next_avail_desc = i;
> >  
> >     txbuf->m_head = m_head;
> >     /*
> > @@ -1156,6 +1134,11 @@ ixgbe_encap(struct tx_ring *txr, struct 
> >     txbuf = &txr->tx_buffers[first];
> >     txbuf->eop_index = last;
> >  
> > +   membar_producer();
> > +
> > +   atomic_sub_int(&txr->tx_avail, map->dm_nsegs);
> > +   txr->next_avail_desc = i;
> > +
> >     ++txr->tx_packets;
> >     return (0);
> >  
> > @@ -1323,6 +1306,10 @@ ixgbe_stop(void *arg)
> >     /* reprogram the RAR[0] in case user changed it. */
> >     ixgbe_set_rar(&sc->hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
> >  
> > +   intr_barrier(sc->tag);
> > +
> > +   KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
> > +
> >     /* Should we really clear all structures on stop? */
> >     ixgbe_free_transmit_structures(sc);
> >     ixgbe_free_receive_structures(sc);
> > @@ -2203,11 +2190,13 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
> >     tx_buffer->m_head = NULL;
> >     tx_buffer->eop_index = -1;
> >  
> > +   membar_producer();
> > +
> >     /* We've consumed the first desc, adjust counters */
> >     if (++ctxd == sc->num_tx_desc)
> >             ctxd = 0;
> >     txr->next_avail_desc = ctxd;
> > -   --txr->tx_avail;
> > +   atomic_dec_int(&txr->tx_avail);
> >  
> >     return (0);
> >  }
> > @@ -2321,10 +2310,12 @@ ixgbe_tso_setup(struct tx_ring *txr, str
> >  
> >     TXD->seqnum_seed = htole32(0);
> >  
> > +   membar_producer();
> > +
> >     if (++ctxd == sc->num_tx_desc)
> >             ctxd = 0;
> >  
> > -   txr->tx_avail--;
> > +   atomic_dec_int(&txr->tx_avail);
> >     txr->next_avail_desc = ctxd;
> >     *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
> >     *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
> > @@ -2346,6 +2337,7 @@ ixgbe_txeof(struct tx_ring *txr)
> >     struct ix_softc                 *sc = txr->sc;
> >     struct ifnet                    *ifp = &sc->arpcom.ac_if;
> >     uint32_t                         first, last, done, processed;
> > +   uint32_t                         num_avail;
> >     struct ixgbe_tx_buf             *tx_buffer;
> >     struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
> >  
> > @@ -2357,23 +2349,19 @@ ixgbe_txeof(struct tx_ring *txr)
> >             return FALSE;
> >     }
> >  
> > -   KERNEL_LOCK();
> > +   membar_consumer();
> >  
> >     processed = 0;
> >     first = txr->next_to_clean;
> >     /* was the txt queue cleaned up in the meantime */
> > -   if (txr->tx_buffers == NULL) {
> > -           KERNEL_UNLOCK();
> > +   if (txr->tx_buffers == NULL)
> >             return FALSE;
> > -   }
> >     tx_buffer = &txr->tx_buffers[first];
> >     /* For cleanup we just use legacy struct */
> >     tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
> >     last = tx_buffer->eop_index;
> > -   if (last == -1) {
> > -           KERNEL_UNLOCK();
> > +   if (last == -1)
> >             return FALSE;
> > -   }
> >     eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
> >  
> >     /*
> > @@ -2395,7 +2383,6 @@ ixgbe_txeof(struct tx_ring *txr)
> >                     tx_desc->upper.data = 0;
> >                     tx_desc->lower.data = 0;
> >                     tx_desc->buffer_addr = 0;
> > -                   ++txr->tx_avail;
> >                     ++processed;
> >  
> >                     if (tx_buffer->m_head) {
> > @@ -2437,31 +2424,24 @@ ixgbe_txeof(struct tx_ring *txr)
> >  
> >     txr->next_to_clean = first;
> >  
> > +   num_avail = atomic_add_int_nv(&txr->tx_avail, processed);
> > +
> > +   /* All clean, turn off the timer */
> > +   if (num_avail == sc->num_tx_desc)
> > +           ifp->if_timer = 0;
> > +
> >     /*
> >      * If we have enough room, clear IFF_OACTIVE to tell the stack that
> > -    * it is OK to send packets. If there are no pending descriptors,
> > -    * clear the timeout. Otherwise, if some descriptors have been freed,
> > -    * restart the timeout.
> > +    * it is OK to send packets.
> >      */
> > -   if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD) {
> > -           ifp->if_flags &= ~IFF_OACTIVE;
> > -
> > -           /* If all are clean turn off the timer */
> > -           if (txr->tx_avail == sc->num_tx_desc) {
> > -                   ifp->if_timer = 0;
> > -                   txr->watchdog_timer = 0;
> > -                   KERNEL_UNLOCK();
> > -                   return FALSE;
> > -           }
> > -           /* Some were cleaned, so reset timer */
> > -           else if (processed) {
> > -                   ifp->if_timer = IXGBE_TX_TIMEOUT;
> > -                   txr->watchdog_timer = IXGBE_TX_TIMEOUT;
> > -           }
> > +   if (ISSET(ifp->if_flags, IFF_OACTIVE) &&
> > +       num_avail > IXGBE_TX_OP_THRESHOLD) {
> > +           KERNEL_LOCK();
> > +           CLR(ifp->if_flags, IFF_OACTIVE);
> > +           ixgbe_start(ifp);
> > +           KERNEL_UNLOCK();
> >     }
> >  
> > -   KERNEL_UNLOCK();
> > -
> >     return TRUE;
> >  }
> >  
> > @@ -2611,8 +2591,6 @@ ixgbe_rxfill(struct rx_ring *rxr)
> >     u_int            slots;
> >     int              i;
> >  
> > -   mtx_enter(&sc->rx_mtx);
> > -
> >     i = rxr->last_desc_filled;
> >     for (slots = if_rxr_get(&rxr->rx_ring, sc->num_rx_desc);
> >         slots > 0; slots--) {
> > @@ -2628,8 +2606,6 @@ ixgbe_rxfill(struct rx_ring *rxr)
> >  
> >     if_rxr_put(&rxr->rx_ring, slots);
> >  
> > -   mtx_leave(&sc->rx_mtx);
> > -
> >     return (post);
> >  }
> >  
> > @@ -2797,10 +2773,8 @@ ixgbe_free_receive_structures(struct ix_
> >     struct rx_ring *rxr;
> >     int             i;
> >  
> > -   mtx_enter(&sc->rx_mtx);
> >     for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
> >             if_rxr_init(&rxr->rx_ring, 0, 0);
> > -   mtx_leave(&sc->rx_mtx);
> >  
> >     for (i = 0, rxr = sc->rx_rings; i < sc->num_queues; i++, rxr++)
> >             ixgbe_free_receive_buffers(rxr);
> > @@ -2854,7 +2828,6 @@ ixgbe_rxeof(struct ix_queue *que)
> >     struct rx_ring          *rxr = que->rxr;
> >     struct ifnet            *ifp = &sc->arpcom.ac_if;
> >     struct mbuf_list         ml = MBUF_LIST_INITIALIZER();
> > -   struct mbuf_list         free_ml = MBUF_LIST_INITIALIZER();
> >     struct mbuf             *mp, *sendmp;
> >     uint8_t                  eop = 0;
> >     uint16_t                 len, vtag;
> > @@ -2867,7 +2840,6 @@ ixgbe_rxeof(struct ix_queue *que)
> >     if (!ISSET(ifp->if_flags, IFF_RUNNING))
> >             return FALSE;
> >  
> > -   mtx_enter(&sc->rx_mtx);
> >     i = rxr->next_to_check;
> >     while (if_rxr_inuse(&rxr->rx_ring) > 0) {
> >             bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
> > @@ -2902,11 +2874,11 @@ ixgbe_rxeof(struct ix_queue *que)
> >                     sc->dropped_pkts++;
> >  
> >                     if (rxbuf->fmp) {
> > -                           ml_enqueue(&free_ml, rxbuf->fmp);
> > +                           m_freem(rxbuf->fmp);
> >                             rxbuf->fmp = NULL;
> >                     }
> >  
> > -                   ml_enqueue(&free_ml, mp);
> > +                   m_freem(mp);
> >                     rxbuf->buf = NULL;
> >                     goto next_desc;
> >             }
> > @@ -2984,10 +2956,6 @@ next_desc:
> >                     i = 0;
> >     }
> >     rxr->next_to_check = i;
> > -   mtx_leave(&sc->rx_mtx);
> > -
> > -   while ((mp = ml_dequeue(&free_ml)))
> > -           m_freem(mp);
> >  
> >     if_input(ifp, &ml);
> >  
> > Index: if_ix.h
> > ===================================================================
> > RCS file: /home/cvs/src/sys/dev/pci/if_ix.h,v
> > retrieving revision 1.29
> > diff -u -p -r1.29 if_ix.h
> > --- if_ix.h 11 Sep 2015 13:02:28 -0000      1.29
> > +++ if_ix.h 30 Sep 2015 12:17:09 -0000
> > @@ -77,10 +77,9 @@
> >  #define IXGBE_TX_TIMEOUT                   5       /* set to 5 seconds */
> >  
> >  /*
> > - * This parameters control when the driver calls the routine to reclaim
> > - * transmit descriptors.
> > + * Thise parameter controls the minimum number of available transmit
> > + * descriptors needed before we attempt transmission of a packet.
> >   */
> > -#define IXGBE_TX_CLEANUP_THRESHOLD (sc->num_tx_desc / 16)
> >  #define IXGBE_TX_OP_THRESHOLD              (sc->num_tx_desc / 32)
> >  
> >  #define IXGBE_MAX_FRAME_SIZE       9216
> > @@ -170,7 +169,7 @@ struct tx_ring {
> >     union ixgbe_adv_tx_desc *tx_base;
> >     struct ixgbe_tx_buf     *tx_buffers;
> >     struct ixgbe_dma_alloc  txdma;
> > -   volatile uint16_t       tx_avail;
> > +   volatile uint32_t       tx_avail;
> >     uint32_t                next_avail_desc;
> >     uint32_t                next_to_clean;
> >     enum {
> > @@ -277,7 +276,6 @@ struct ix_softc {
> >      * Receive rings:
> >      *      Allocated at run time, an array of rings.
> >      */
> > -   struct mutex            rx_mtx;
> >     struct rx_ring          *rx_rings;
> >     uint64_t                que_mask;
> >     int                     num_rx_desc;
> > 
> 
> 

Reply via email to