Hi Jan, First of all, thanks for looking at this, I forgot we hadn't done offloads for ixl(4) yet.
On Mon, Oct 25, 2021 at 05:27:28PM +0200, Jan Klemkow wrote: > On Fri, Oct 22, 2021 at 03:39:01PM +0200, Hrvoje Popovski wrote: > > On 22.10.2021. 13:39, Jan Klemkow wrote: > > > Thats because, you only see this flags, if the checksum offloading is > > > enabled for "sending". I'm still working/debugging on the sending side. > > > Thus, I just send a diff with the receiving part for now. > > > > > > You can see if its working for your card with the netstat(8) statistics. > > > > > > # netstat -s | grep software-checksummed > > > > > > These counters should not raise much on the receive side if you put some > > > traffic over the interface. > > > > Thank you for explanation... > > > > I'm sending 8 tcp streams with iperf3 from some box to openbsd ixl box > > and here are results: > > > > without diff > > smc24# netstat -s | grep software-checksummed > > 5039250 input datagrams software-checksummed > > 2592718 output datagrams software-checksummed > > 2592709 packets software-checksummed > > 5039250 packets software-checksummed > > 0 input packets software-checksummed > > 0 output packets software-checksummed > > > > cca 6.12 Gbits/sec > > > > > > > > with diff > > smc24# netstat -s | grep software-checksummed > > 0 input datagrams software-checksummed > > 2956546 output datagrams software-checksummed > > 2956537 packets software-checksummed > > 0 packets software-checksummed > > 0 input packets software-checksummed > > 0 output packets software-checksummed > > > > cca 6.70 Gbits/sec > > > > are result like those expected? > > > > is forwarding testing any good for checksum offload diffs? > > Hi Hrvoje, > > Thanks a lot for you big testing efforts! > > In case of forwarding the forwarding box just checks the IPv4 header > checksum and ignores the UDP/TCP header. Your setup from one Box to > another is fine. > > Here is a new diff, which also includes send checksum offloading. > Thus, all software-checksummed numbers should stay low in both > directions. > > Could you test this diff with your ospf{6}d and NFS tests? > If you see IPv4 fragments in the ospf and NFS traffic within tcpdump(8), > your test should find the bugs pointed out by deraadt@ and claudio@. In the case of ixl(4), the driver has to tell the nic the length of each of the packet headers, so it should also be tested with vlan interfaces. I think ixl_tx_setup_offload() needs to account for outgoing vlan-tagged packets. It currently assumes the ethernet header is ETHER_HDR_LEN bytes long, which isn't always true. See ixgbe_tx_ctx_setup() (sys/dev/pci/if_ix.c) for an example of a driver that takes this into account. > > You can provoke large NFS packets with the following options on your NFS > mount point. > > server:/export /mnt nfs ro,intr,-r65536,-w65536 > > Thanks, > Jan > > Index: dev/pci/if_ixl.c > =================================================================== > RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v > retrieving revision 1.75 > diff -u -p -r1.75 if_ixl.c > --- dev/pci/if_ixl.c 23 Jul 2021 00:29:14 -0000 1.75 > +++ dev/pci/if_ixl.c 25 Oct 2021 15:11:46 -0000 > @@ -82,6 +82,10 @@ > #endif > > #include <netinet/in.h> > +#include <netinet/ip.h> > +#include <netinet/ip6.h> > +#include <netinet/tcp.h> > +#include <netinet/udp.h> > #include <netinet/if_ether.h> > > #include <dev/pci/pcireg.h> > @@ -1388,6 +1392,7 @@ static int ixl_rxeof(struct ixl_softc *, > static void ixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *); > static void ixl_rxrefill(void *); > static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *); > +static void ixl_rx_checksum(struct mbuf *, uint64_t); > > #if NKSTAT > 0 > static void ixl_kstat_attach(struct ixl_softc *); > @@ -1942,9 +1947,9 @@ ixl_attach(struct device *parent, struct > ifp->if_capabilities = IFCAP_VLAN_MTU; > #if 0 > ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; > - ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | > - IFCAP_CSUM_UDPv4; > #endif > + ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | > + IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > > ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status); > > @@ -2772,6 +2777,69 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm > } > > static void > +ixl_tx_setup_offload(struct mbuf *mp, uint64_t *cmd) > +{ > + uint64_t ip_hdr_len; > + int ipoff = ETHER_HDR_LEN; > + uint8_t ipproto; > + struct ip *ip; > +#ifdef INET6 > + struct ip6_hdr *ip6; > +#endif > + struct tcphdr *th; > + struct mbuf *m; > + > + switch (ntohs(mtod(mp, struct ether_header *)->ether_type)) { > + case ETHERTYPE_IP: > + if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip)) > + return; > + m = m_getptr(mp, ETHER_HDR_LEN, &ipoff); > + KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip)); > + ip = (struct ip *)(m->m_data + ipoff); > + > + if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) > + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4_CSUM; > + else > + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4; > + > + ip_hdr_len = ip->ip_hl << 2; > + ipproto = ip->ip_p; > + break; > +#ifdef INET6 > + case ETHERTYPE_IPV6: > + if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip6)) > + return; > + m = m_getptr(mp, ETHER_HDR_LEN, &ipoff); > + KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip6)); > + ip6 = (struct ip6_hdr *)(m->m_data + ipoff); > + > + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV6; > + > + ip_hdr_len = sizeof(*ip6); > + ipproto = ip6->ip6_nxt; > + break; > +#endif > + default: > + return; > + } > + > + *cmd |= (ETHER_HDR_LEN >> 1) << IXL_TX_DESC_MACLEN_SHIFT; > + *cmd |= (ip_hdr_len >> 2) << IXL_TX_DESC_IPLEN_SHIFT; > + > + if (ipproto == IPPROTO_TCP && m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { > + th = (struct tcphdr *)(m->m_data + ipoff + ip_hdr_len); > + > + *cmd |= IXL_TX_DESC_CMD_L4T_EOFT_TCP; > + *cmd |= (uint64_t)th->th_off << IXL_TX_DESC_L4LEN_SHIFT; > + } > + > + if (ipproto == IPPROTO_UDP && m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { > + *cmd |= IXL_TX_DESC_CMD_L4T_EOFT_UDP; > + *cmd |= (sizeof(struct udphdr) >> 2) << IXL_TX_DESC_L4LEN_SHIFT; > + } > +} > + > +static void > ixl_start(struct ifqueue *ifq) > { > struct ifnet *ifp = ifq->ifq_if; > @@ -2781,7 +2849,7 @@ ixl_start(struct ifqueue *ifq) > struct ixl_tx_map *txm; > bus_dmamap_t map; > struct mbuf *m; > - uint64_t cmd; > + uint64_t cmd, off = 0; > unsigned int prod, free, last, i; > unsigned int mask; > int post = 0; > @@ -2828,12 +2896,15 @@ ixl_start(struct ifqueue *ifq) > bus_dmamap_sync(sc->sc_dmat, map, 0, > map->dm_mapsize, BUS_DMASYNC_PREWRITE); > > + ixl_tx_setup_offload(m, &off); > + > for (i = 0; i < map->dm_nsegs; i++) { > txd = &ring[prod]; > > cmd = (uint64_t)map->dm_segs[i].ds_len << > IXL_TX_DESC_BSIZE_SHIFT; > cmd |= IXL_TX_DESC_DTYPE_DATA | IXL_TX_DESC_CMD_ICRC; > + cmd |= off; > > htolem64(&txd->addr, map->dm_segs[i].ds_addr); > htolem64(&txd->cmd, cmd); > @@ -3190,6 +3261,7 @@ ixl_rxeof(struct ixl_softc *sc, struct i > m->m_pkthdr.csum_flags |= M_FLOWID; > } > > + ixl_rx_checksum(m, word); > ml_enqueue(&ml, m); > } else { > ifp->if_ierrors++; /* XXX */ > @@ -3320,6 +3392,23 @@ ixl_rxrinfo(struct ixl_softc *sc, struct > free(ifr, M_TEMP, ixl_nqueues(sc) * sizeof(*ifr)); > > return (rv); > +} > + > +static void > +ixl_rx_checksum(struct mbuf *m, uint64_t word) > +{ > + if (!ISSET(word, IXL_RX_DESC_L3L4P)) > + return; > + > + if (ISSET(word, IXL_RX_DESC_IPE)) > + return; > + > + m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK; > + > + if (ISSET(word, IXL_RX_DESC_L4E)) > + return; > + > + m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK; > } > > static int >