Like ix(4), em(4) hardware doesn't provide an easy/efficient way to guarantee alignment of protocol headers for received mbufs. The current code makes an attempt to m_adj() the mbuf if the maximum hardware frame size is smaller than the cluster size. But ever since we changed our drivers to always accept jumbo frames if the hardware is capable of doing so, this codepath is effectively dead on jumbo-capable hardware. So on jumbo-capable hardware we'll always end up patching up the mbuf after the fact, which effectively means we'll do a bcopy() of the entire packet on strict alignment architectures.
The diff below avoids this by taking an approach similar to ix(4). This means that on strict alignment architectures, we'll use a larger cluster size such that after m_adj()usting the mbuf there are at least 2048 bytes for the hardware to play with. This means we'll waste (quite) a bit of memory on those architectures. But I think it makes more sense than copying around all the data in the rx path, especially with the direction we've taken with processing received packets in a kernel thread that may be running on a different CPU than the interrupt handler. As a bonus, it simplifies the code quite a bit. If we ever manage to fix the *8 pool diff, we can reduce the amount of wasted memory, by adding a suitably sized mbuf pool on strict-alignment architectures. At least I think that would be preferable over the dedicated driver-specific pool that dlg@ proposed for ix(4). That is why I specified the size of the cluster as #define EM_MCLBYTES (EM_RXBUFFER_2048 + ETHER_ALIGN) Unfortunately that means that "systat mbuf" output is a bit confusing: IFACE LIVELOCKS SIZE ALIVE LWM HWM CWM System 0 256 24 4 4096 14 12 ... em0 2050 14 10 256 14 See how the mbufs used by em0 are reported as 4096 under "System", but 2050 under "em0"? We could fix that by looking up the right pool, and using its item size in the rxr ioctl if people think this is a problem. ok? Index: if_em.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.c,v retrieving revision 1.299 diff -u -p -r1.299 if_em.c --- if_em.c 24 Jun 2015 09:40:54 -0000 1.299 +++ if_em.c 11 Aug 2015 14:20:10 -0000 @@ -229,11 +229,6 @@ void em_disable_aspm(struct em_softc *); void em_txeof(struct em_softc *); int em_allocate_receive_structures(struct em_softc *); int em_allocate_transmit_structures(struct em_softc *); -#ifdef __STRICT_ALIGNMENT -void em_realign(struct em_softc *, struct mbuf *, u_int16_t *); -#else -#define em_realign(a, b, c) /* a, b, c */ -#endif int em_rxfill(struct em_softc *); void em_rxeof(struct em_softc *); void em_receive_checksum(struct em_softc *, struct em_rx_desc *, @@ -706,7 +701,7 @@ em_ioctl(struct ifnet *ifp, u_long comma case SIOCGIFRXR: error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data, - NULL, MCLBYTES, &sc->rx_ring); + NULL, EM_MCLBYTES, &sc->rx_ring); break; default: @@ -2518,14 +2513,15 @@ em_get_buf(struct em_softc *sc, int i) return (ENOBUFS); } - m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES); + m = MCLGETI(NULL, M_DONTWAIT, NULL, EM_MCLBYTES); if (!m) { sc->mbuf_cluster_failed++; return (ENOBUFS); } - m->m_len = m->m_pkthdr.len = MCLBYTES; - if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN)) - m_adj(m, ETHER_ALIGN); + m->m_len = m->m_pkthdr.len = EM_MCLBYTES; +#ifdef __STRICT_ALIGNMENT + m_adj(m, ETHER_ALIGN); +#endif error = bus_dmamap_load_mbuf(sc->rxtag, pkt->map, m, BUS_DMA_NOWAIT); if (error) { @@ -2574,8 +2570,8 @@ em_allocate_receive_structures(struct em rx_buffer = sc->rx_buffer_area; for (i = 0; i < sc->num_rx_desc; i++, rx_buffer++) { - error = bus_dmamap_create(sc->rxtag, MCLBYTES, 1, - MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map); + error = bus_dmamap_create(sc->rxtag, EM_MCLBYTES, 1, + EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map); if (error != 0) { printf("%s: em_allocate_receive_structures: " "bus_dmamap_create failed; error %u\n", @@ -2771,53 +2767,6 @@ em_free_receive_structures(struct em_sof } } -#ifdef __STRICT_ALIGNMENT -void -em_realign(struct em_softc *sc, struct mbuf *m, u_int16_t *prev_len_adj) -{ - unsigned char tmp_align_buf[ETHER_ALIGN]; - int tmp_align_buf_len = 0; - - /* - * The Ethernet payload is not 32-bit aligned when - * Jumbo packets are enabled, so on architectures with - * strict alignment we need to shift the entire packet - * ETHER_ALIGN bytes. Ugh. - */ - if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN)) - return; - - if (*prev_len_adj > sc->align_buf_len) - *prev_len_adj -= sc->align_buf_len; - else - *prev_len_adj = 0; - - if (m->m_len > (MCLBYTES - ETHER_ALIGN)) { - bcopy(m->m_data + (MCLBYTES - ETHER_ALIGN), - &tmp_align_buf, ETHER_ALIGN); - tmp_align_buf_len = m->m_len - - (MCLBYTES - ETHER_ALIGN); - m->m_len -= ETHER_ALIGN; - } - - if (m->m_len) { - bcopy(m->m_data, m->m_data + ETHER_ALIGN, m->m_len); - if (!sc->align_buf_len) - m->m_data += ETHER_ALIGN; - } - - if (sc->align_buf_len) { - m->m_len += sc->align_buf_len; - bcopy(&sc->align_buf, m->m_data, sc->align_buf_len); - } - - if (tmp_align_buf_len) - bcopy(&tmp_align_buf, &sc->align_buf, tmp_align_buf_len); - - sc->align_buf_len = tmp_align_buf_len; -} -#endif /* __STRICT_ALIGNMENT */ - int em_rxfill(struct em_softc *sc) { @@ -2944,8 +2893,6 @@ em_rxeof(struct em_softc *sc) if (accept_frame) { /* Assign correct length to the current fragment */ m->m_len = len; - - em_realign(sc, m, &prev_len_adj); /* STRICT_ALIGN */ if (sc->fmp == NULL) { m->m_pkthdr.len = m->m_len; Index: if_em.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.h,v retrieving revision 1.54 diff -u -p -r1.54 if_em.h --- if_em.h 26 Dec 2014 05:46:32 -0000 1.54 +++ if_em.h 11 Aug 2015 14:20:10 -0000 @@ -266,6 +266,12 @@ typedef int boolean_t; #define EM_RXBUFFER_8192 8192 #define EM_RXBUFFER_16384 16384 +#ifdef __STRICT_ALIGNMENT +#define EM_MCLBYTES (EM_RXBUFFER_2048 + ETHER_ALIGN) +#else +#define EM_MCLBYTES EM_RXBUFFER_2048 +#endif + #define EM_MAX_SCATTER 64 #define EM_TSO_SIZE 65535 @@ -323,12 +329,6 @@ struct em_softc { struct timeout em_intr_enable; struct timeout timer_handle; struct timeout tx_fifo_timer_handle; - -#ifdef __STRICT_ALIGNMENT - /* Used for carrying forward alignment adjustments */ - unsigned char align_buf[ETHER_ALIGN]; /* tail of unaligned packet */ - u_int8_t align_buf_len; /* bytes in tail */ -#endif /* __STRICT_ALIGNMENT */ /* Info about the board itself */ u_int32_t part_num;