Like ix(4), em(4) hardware doesn't provide an easy/efficient way to
guarantee alignment of protocol headers for received mbufs.  The
current code makes an attempt to m_adj() the mbuf if the maximum
hardware frame size is smaller than the cluster size.  But ever since
we changed our drivers to always accept jumbo frames if the hardware
is capable of doing so, this codepath is effectively dead on
jumbo-capable hardware.  So on jumbo-capable hardware we'll always end
up patching up the mbuf after the fact, which effectively means we'll
do a bcopy() of the entire packet on strict alignment architectures.

The diff below avoids this by taking an approach similar to ix(4).
This means that on strict alignment architectures, we'll use a larger
cluster size such that after m_adj()usting the mbuf there are at least
2048 bytes for the hardware to play with.  This means we'll waste
(quite) a bit of memory on those architectures.  But I think it makes
more sense than copying around all the data in the rx path, especially
with the direction we've taken with processing received packets in a
kernel thread that may be running on a different CPU than the
interrupt handler.  As a bonus, it simplifies the code quite a bit.

If we ever manage to fix the *8 pool diff, we can reduce the amount of
wasted memory, by adding a suitably sized mbuf pool on
strict-alignment architectures.  At least I think that would be
preferable over the dedicated driver-specific pool that dlg@ proposed
for ix(4).  That is why I specified the size of the cluster as

  #define EM_MCLBYTES           (EM_RXBUFFER_2048 + ETHER_ALIGN)

Unfortunately that means that "systat mbuf" output is a bit confusing:

IFACE             LIVELOCKS  SIZE ALIVE   LWM   HWM   CWM
System                    0   256    24           4
                             4096    14          12

...

em0                          2050    14    10   256    14


See how the mbufs used by em0 are reported as 4096 under "System", but
2050 under "em0"?  We could fix that by looking up the right pool, and
using its item size in the rxr ioctl if people think this is a problem.

ok?


Index: if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.299
diff -u -p -r1.299 if_em.c
--- if_em.c     24 Jun 2015 09:40:54 -0000      1.299
+++ if_em.c     11 Aug 2015 14:20:10 -0000
@@ -229,11 +229,6 @@ void em_disable_aspm(struct em_softc *);
 void em_txeof(struct em_softc *);
 int  em_allocate_receive_structures(struct em_softc *);
 int  em_allocate_transmit_structures(struct em_softc *);
-#ifdef __STRICT_ALIGNMENT
-void em_realign(struct em_softc *, struct mbuf *, u_int16_t *);
-#else
-#define em_realign(a, b, c) /* a, b, c */
-#endif
 int  em_rxfill(struct em_softc *);
 void em_rxeof(struct em_softc *);
 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
@@ -706,7 +701,7 @@ em_ioctl(struct ifnet *ifp, u_long comma
 
        case SIOCGIFRXR:
                error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
-                   NULL, MCLBYTES, &sc->rx_ring);
+                   NULL, EM_MCLBYTES, &sc->rx_ring);
                break;
 
        default:
@@ -2518,14 +2513,15 @@ em_get_buf(struct em_softc *sc, int i)
                return (ENOBUFS);
        }
 
-       m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
+       m = MCLGETI(NULL, M_DONTWAIT, NULL, EM_MCLBYTES);
        if (!m) {
                sc->mbuf_cluster_failed++;
                return (ENOBUFS);
        }
-       m->m_len = m->m_pkthdr.len = MCLBYTES;
-       if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
-               m_adj(m, ETHER_ALIGN);
+       m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
+#ifdef __STRICT_ALIGNMENT
+       m_adj(m, ETHER_ALIGN);
+#endif
 
        error = bus_dmamap_load_mbuf(sc->rxtag, pkt->map, m, BUS_DMA_NOWAIT);
        if (error) {
@@ -2574,8 +2570,8 @@ em_allocate_receive_structures(struct em
 
        rx_buffer = sc->rx_buffer_area;
        for (i = 0; i < sc->num_rx_desc; i++, rx_buffer++) {
-               error = bus_dmamap_create(sc->rxtag, MCLBYTES, 1,
-                   MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map);
+               error = bus_dmamap_create(sc->rxtag, EM_MCLBYTES, 1,
+                   EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map);
                if (error != 0) {
                        printf("%s: em_allocate_receive_structures: "
                            "bus_dmamap_create failed; error %u\n",
@@ -2771,53 +2767,6 @@ em_free_receive_structures(struct em_sof
        }
 }
 
-#ifdef __STRICT_ALIGNMENT
-void
-em_realign(struct em_softc *sc, struct mbuf *m, u_int16_t *prev_len_adj)
-{
-       unsigned char tmp_align_buf[ETHER_ALIGN];
-       int tmp_align_buf_len = 0;
-
-       /*
-        * The Ethernet payload is not 32-bit aligned when
-        * Jumbo packets are enabled, so on architectures with
-        * strict alignment we need to shift the entire packet
-        * ETHER_ALIGN bytes. Ugh.
-        */
-       if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
-               return;
-
-       if (*prev_len_adj > sc->align_buf_len)
-               *prev_len_adj -= sc->align_buf_len;
-       else
-               *prev_len_adj = 0;
-
-       if (m->m_len > (MCLBYTES - ETHER_ALIGN)) {
-               bcopy(m->m_data + (MCLBYTES - ETHER_ALIGN),
-                   &tmp_align_buf, ETHER_ALIGN);
-               tmp_align_buf_len = m->m_len -
-                   (MCLBYTES - ETHER_ALIGN);
-               m->m_len -= ETHER_ALIGN;
-       } 
-
-       if (m->m_len) {
-               bcopy(m->m_data, m->m_data + ETHER_ALIGN, m->m_len);
-               if (!sc->align_buf_len)
-                       m->m_data += ETHER_ALIGN;
-       }
-
-       if (sc->align_buf_len) {
-               m->m_len += sc->align_buf_len;
-               bcopy(&sc->align_buf, m->m_data, sc->align_buf_len);
-       }
-
-       if (tmp_align_buf_len) 
-               bcopy(&tmp_align_buf, &sc->align_buf, tmp_align_buf_len);
-
-       sc->align_buf_len = tmp_align_buf_len;
-}
-#endif /* __STRICT_ALIGNMENT */
-
 int
 em_rxfill(struct em_softc *sc)
 {
@@ -2944,8 +2893,6 @@ em_rxeof(struct em_softc *sc)
                if (accept_frame) {
                        /* Assign correct length to the current fragment */
                        m->m_len = len;
-
-                       em_realign(sc, m, &prev_len_adj); /* STRICT_ALIGN */
 
                        if (sc->fmp == NULL) {
                                m->m_pkthdr.len = m->m_len;
Index: if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
retrieving revision 1.54
diff -u -p -r1.54 if_em.h
--- if_em.h     26 Dec 2014 05:46:32 -0000      1.54
+++ if_em.h     11 Aug 2015 14:20:10 -0000
@@ -266,6 +266,12 @@ typedef int        boolean_t;
 #define EM_RXBUFFER_8192       8192
 #define EM_RXBUFFER_16384      16384
 
+#ifdef __STRICT_ALIGNMENT
+#define EM_MCLBYTES            (EM_RXBUFFER_2048 + ETHER_ALIGN)
+#else
+#define EM_MCLBYTES            EM_RXBUFFER_2048
+#endif
+
 #define EM_MAX_SCATTER         64
 #define EM_TSO_SIZE            65535
 
@@ -323,12 +329,6 @@ struct em_softc {
        struct timeout  em_intr_enable;
        struct timeout  timer_handle;
        struct timeout  tx_fifo_timer_handle;
-
-#ifdef __STRICT_ALIGNMENT
-       /* Used for carrying forward alignment adjustments */
-       unsigned char   align_buf[ETHER_ALIGN]; /* tail of unaligned packet */
-       u_int8_t        align_buf_len;          /* bytes in tail */
-#endif /* __STRICT_ALIGNMENT */
 
        /* Info about the board itself */
        u_int32_t       part_num;

Reply via email to