Hi, I always bothered me that ip_fragment() and ip6_fragment() behave sligtly differently. Unify them and use an mlist to simplify the fragment list.
- The functions ip_fragment() and ip6_fragment() always consume the mbuf. - They free the mbuf and mbuf list in case of an error. - They care about the counter. - Adjust the code a bit to make v4 and v6 look similar. - Maybe there was an mbuf leak when pf_route6() called pf_refragment6() and it failed. Now the mbuf is always freed by ip6_fragment(). ok? bluhm Index: net/if_bridge.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v retrieving revision 1.352 diff -u -p -r1.352 if_bridge.c --- net/if_bridge.c 25 Feb 2021 02:48:21 -0000 1.352 +++ net/if_bridge.c 26 Feb 2021 10:41:57 -0000 @@ -1853,7 +1853,7 @@ bridge_fragment(struct ifnet *brifp, str struct mbuf *m) { struct llc llc; - struct mbuf *m0; + struct mbuf_list ml; int error = 0; int hassnap = 0; u_int16_t etype; @@ -1911,40 +1911,32 @@ bridge_fragment(struct ifnet *brifp, str return; } - error = ip_fragment(m, ifp, ifp->if_mtu); - if (error) { - m = NULL; - goto dropit; - } + error = ip_fragment(m, &ml, ifp, ifp->if_mtu); + if (error) + return; - for (; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = NULL; - if (error == 0) { - if (hassnap) { - M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT); - if (m == NULL) { - error = ENOBUFS; - continue; - } - bcopy(&llc, mtod(m, caddr_t), - LLC_SNAPFRAMELEN); - } - M_PREPEND(m, sizeof(*eh), M_DONTWAIT); + while ((m = ml_dequeue(&ml)) != NULL) { + if (hassnap) { + M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT); if (m == NULL) { error = ENOBUFS; - continue; + break; } - bcopy(eh, mtod(m, caddr_t), sizeof(*eh)); - error = bridge_ifenqueue(brifp, ifp, m); - if (error) { - continue; - } - } else - m_freem(m); + bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN); + } + M_PREPEND(m, sizeof(*eh), M_DONTWAIT); + if (m == NULL) { + error = ENOBUFS; + break; + } + bcopy(eh, mtod(m, caddr_t), sizeof(*eh)); + error = bridge_ifenqueue(brifp, ifp, m); + if (error) + break; } - - if (error == 0) + if (error) + ml_purge(&ml); + else ipstat_inc(ips_fragmented); return; Index: net/pf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v retrieving revision 1.1112 diff -u -p -r1.1112 pf.c --- net/pf.c 23 Feb 2021 11:43:40 -0000 1.1112 +++ net/pf.c 26 Feb 2021 10:41:57 -0000 @@ -5969,7 +5969,8 @@ pf_rtlabel_match(struct pf_addr *addr, s void pf_route(struct pf_pdesc *pd, struct pf_state *s) { - struct mbuf *m0, *m1; + struct mbuf *m0; + struct mbuf_list ml; struct sockaddr_in *dst, sin; struct rtentry *rt = NULL; struct ip *ip; @@ -6078,23 +6079,18 @@ pf_route(struct pf_pdesc *pd, struct pf_ goto bad; } - m1 = m0; - error = ip_fragment(m0, ifp, ifp->if_mtu); - if (error) { - m0 = NULL; - goto bad; - } + error = ip_fragment(m0, &ml, ifp, ifp->if_mtu); + if (error) + goto done; - for (m0 = m1; m0; m0 = m1) { - m1 = m0->m_nextpkt; - m0->m_nextpkt = NULL; - if (error == 0) - error = ifp->if_output(ifp, m0, sintosa(dst), rt); - else - m_freem(m0); + while ((m0 = ml_dequeue(&ml)) != NULL) { + error = ifp->if_output(ifp, m0, sintosa(dst), rt); + if (error) + break; } - - if (error == 0) + if (error) + ml_purge(&ml); + else ipstat_inc(ips_fragmented); done: Index: net/pf_norm.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf_norm.c,v retrieving revision 1.221 diff -u -p -r1.221 pf_norm.c --- net/pf_norm.c 22 Feb 2021 13:04:56 -0000 1.221 +++ net/pf_norm.c 26 Feb 2021 10:49:40 -0000 @@ -966,12 +966,13 @@ int pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst, struct ifnet *ifp, struct rtentry *rt) { - struct mbuf *m = *m0, *t; + struct mbuf *m = *m0; + struct mbuf_list ml; struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); u_int32_t mtu; u_int16_t hdrlen, extoff, maxlen; u_int8_t proto; - int error, action; + int error; hdrlen = ftag->ft_hdrlen; extoff = ftag->ft_extoff; @@ -1009,39 +1010,25 @@ pf_refragment6(struct mbuf **m0, struct * we drop the packet. */ mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; - error = ip6_fragment(m, hdrlen, proto, mtu); - - m = (*m0)->m_nextpkt; - (*m0)->m_nextpkt = NULL; - if (error == 0) { - /* The first mbuf contains the unfragmented packet */ - m_freemp(m0); - action = PF_PASS; - } else { - /* Drop expects an mbuf to free */ + error = ip6_fragment(m, &ml, hdrlen, proto, mtu); + *m0 = NULL; /* ip6_fragment() has consumed original packet. */ + if (error) { DPFPRINTF(LOG_NOTICE, "refragment error %d", error); - action = PF_DROP; + return (PF_DROP); } - for (t = m; m; m = t) { - t = m->m_nextpkt; - m->m_nextpkt = NULL; + while ((m = ml_dequeue(&ml)) != NULL) { m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; - if (error == 0) { - if (ifp == NULL) { - ip6_forward(m, NULL, 0); - } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) { - ifp->if_output(ifp, m, sin6tosa(dst), rt); - } else { - icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, - ifp->if_mtu); - } + if (ifp == NULL) { + ip6_forward(m, NULL, 0); + } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) { + ifp->if_output(ifp, m, sin6tosa(dst), rt); } else { - m_freem(m); + icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); } } - return (action); + return (PF_PASS); } #endif /* INET6 */ Index: netinet/ip_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v retrieving revision 1.367 diff -u -p -r1.367 ip_output.c --- netinet/ip_output.c 23 Feb 2021 12:14:10 -0000 1.367 +++ netinet/ip_output.c 26 Feb 2021 10:41:57 -0000 @@ -96,12 +96,12 @@ ip_output_ipsec_send(struct tdb *, struc * The mbuf opt, if present, will not be freed. */ int -ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, +ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) { struct ip *ip; struct ifnet *ifp = NULL; - struct mbuf *m = m0; + struct mbuf_list ml; int hlen = sizeof (struct ip); int error = 0; struct route iproute; @@ -502,22 +502,18 @@ sendit: goto bad; } - error = ip_fragment(m, ifp, mtu); - if (error) { - m = m0 = NULL; - goto bad; - } + error = ip_fragment(m, &ml, ifp, mtu); + if (error) + goto done; - for (; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = NULL; - if (error == 0) - error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); - else - m_freem(m); + while ((m = ml_dequeue(&ml)) != NULL) { + error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); + if (error) + break; } - - if (error == 0) + if (error) + ml_purge(&ml); + else ipstat_inc(ips_fragmented); done: @@ -525,6 +521,7 @@ done: rtfree(ro->ro_rt); if_put(ifp); return (error); + bad: m_freem(m); goto done; @@ -651,23 +648,23 @@ ip_output_ipsec_send(struct tdb *tdb, st #endif /* IPSEC */ int -ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) +ip_fragment(struct mbuf *m, struct mbuf_list *ml, struct ifnet *ifp, u_long mtu) { struct ip *ip, *mhip; struct mbuf *m0; int len, hlen, off; int mhlen, firstlen; - struct mbuf **mnext; - int fragments = 0; - int error = 0; + int error; + + ml_init(ml); + ml_enqueue(ml, m); ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; - len = (mtu - hlen) &~ 7; if (len < 8) { - m_freem(m); - return (EMSGSIZE); + error = EMSGSIZE; + goto bad; } /* @@ -676,7 +673,6 @@ ip_fragment(struct mbuf *m, struct ifnet */ in_proto_cksum_out(m, NULL); firstlen = len; - mnext = &m->m_nextpkt; /* * Loop through length of segment after first fragment, @@ -687,12 +683,10 @@ ip_fragment(struct mbuf *m, struct ifnet for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == NULL) { - ipstat_inc(ips_odropped); error = ENOBUFS; - goto sendorfree; + goto bad; } - *mnext = m; - mnext = &m->m_nextpkt; + ml_enqueue(ml, m); m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); *mhip = *ip; @@ -715,10 +709,9 @@ ip_fragment(struct mbuf *m, struct ifnet mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_int16_t)(len + mhlen)); m->m_next = m_copym(m0, off, len, M_NOWAIT); - if (m->m_next == 0) { - ipstat_inc(ips_odropped); + if (m->m_next == NULL) { error = ENOBUFS; - goto sendorfree; + goto bad; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.ph_ifidx = 0; @@ -730,8 +723,6 @@ ip_fragment(struct mbuf *m, struct ifnet ipstat_inc(ips_outswcsum); mhip->ip_sum = in_cksum(m, mhlen); } - ipstat_inc(ips_ofragments); - fragments++; } /* * Update first fragment by trimming what's been copied out @@ -749,15 +740,13 @@ ip_fragment(struct mbuf *m, struct ifnet ipstat_inc(ips_outswcsum); ip->ip_sum = in_cksum(m, hlen); } -sendorfree: - if (error) { - for (m = m0; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = NULL; - m_freem(m); - } - } + ipstat_add(ips_ofragments, ml_len(ml)); + return (0); + +bad: + ipstat_inc(ips_odropped); + ml_purge(ml); return (error); } Index: netinet/ip_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.86 diff -u -p -r1.86 ip_var.h --- netinet/ip_var.h 8 Dec 2019 11:08:22 -0000 1.86 +++ netinet/ip_var.h 26 Feb 2021 10:41:57 -0000 @@ -145,6 +145,12 @@ ipstat_inc(enum ipstat_counters c) counters_inc(ipcounters, c); } +static inline void +ipstat_add(enum ipstat_counters c, uint64_t v) +{ + counters_add(ipcounters, c, v); +} + /* * Structure attached to inpcb.ip_moptions and * passed to ip_output when IP multicast options are in use. @@ -218,7 +224,7 @@ struct inpcb; int ip_ctloutput(int, struct socket *, int, int, struct mbuf *); void ip_flush(void); -int ip_fragment(struct mbuf *, struct ifnet *, u_long); +int ip_fragment(struct mbuf *, struct mbuf_list *, struct ifnet *, u_long); void ip_freef(struct ipq *); void ip_freemoptions(struct ip_moptions *); int ip_getmoptions(int, struct ip_moptions *, struct mbuf *); Index: netinet6/ip6_id.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_id.c,v retrieving revision 1.14 diff -u -p -r1.14 ip6_id.c --- netinet6/ip6_id.c 24 Jun 2020 22:03:44 -0000 1.14 +++ netinet6/ip6_id.c 26 Feb 2021 10:41:57 -0000 @@ -83,6 +83,7 @@ #include <sys/param.h> #include <sys/kernel.h> +#include <sys/mbuf.h> #include <sys/socket.h> #include <sys/systm.h> Index: netinet6/ip6_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.254 diff -u -p -r1.254 ip6_output.c --- netinet6/ip6_output.c 23 Feb 2021 11:43:41 -0000 1.254 +++ netinet6/ip6_output.c 26 Feb 2021 10:41:57 -0000 @@ -154,12 +154,12 @@ struct idgen32_ctx ip6_id_ctx; * which is rt_mtu. */ int -ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, +ip6_output(struct mbuf *m, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct inpcb *inp) { struct ip6_hdr *ip6; struct ifnet *ifp = NULL; - struct mbuf *m = m0; + struct mbuf_list ml; int hlen, tlen; struct route_in6 ip6route; struct rtentry *rt = NULL; @@ -174,6 +174,7 @@ ip6_output(struct mbuf *m0, struct ip6_p struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; u_int8_t sproto = 0; + u_char nextproto; #ifdef IPSEC struct tdb *tdb = NULL; #endif /* IPSEC */ @@ -709,64 +710,47 @@ reroute: /* jumbo payload cannot be fragmented */ error = EMSGSIZE; goto bad; - } else { - u_char nextproto; -#if 0 - struct ip6ctlparam ip6cp; - u_int32_t mtu32; -#endif - - /* - * Too large for the destination or interface; - * fragment if possible. - * Must be able to put at least 8 bytes per fragment. - */ - hlen = unfragpartlen; - if (mtu > IPV6_MAXPACKET) - mtu = IPV6_MAXPACKET; - - /* - * Change the next header field of the last header in the - * unfragmentable part. - */ - if (exthdrs.ip6e_rthdr) { - nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); - *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; - } else if (exthdrs.ip6e_dest1) { - nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); - *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; - } else if (exthdrs.ip6e_hbh) { - nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); - *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; - } else { - nextproto = ip6->ip6_nxt; - ip6->ip6_nxt = IPPROTO_FRAGMENT; - } - - m0 = m; - error = ip6_fragment(m0, hlen, nextproto, mtu); - if (error) - ip6stat_inc(ip6s_odropped); } /* - * Remove leading garbages. + * Too large for the destination or interface; + * fragment if possible. + * Must be able to put at least 8 bytes per fragment. */ - m = m0->m_nextpkt; - m0->m_nextpkt = NULL; - m_freem(m0); - for (m0 = m; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = NULL; - if (error == 0) { - ip6stat_inc(ip6s_ofragments); - error = ifp->if_output(ifp, m, sin6tosa(dst), - ro->ro_rt); - } else - m_freem(m); + hlen = unfragpartlen; + if (mtu > IPV6_MAXPACKET) + mtu = IPV6_MAXPACKET; + + /* + * Change the next header field of the last header in the + * unfragmentable part. + */ + if (exthdrs.ip6e_rthdr) { + nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); + *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; + } else if (exthdrs.ip6e_dest1) { + nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); + *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; + } else if (exthdrs.ip6e_hbh) { + nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); + *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; + } else { + nextproto = ip6->ip6_nxt; + ip6->ip6_nxt = IPPROTO_FRAGMENT; } - if (error == 0) + error = ip6_fragment(m, &ml, hlen, nextproto, mtu); + if (error) + goto done; + + while ((m = ml_dequeue(&ml)) != NULL) { + error = ifp->if_output(ifp, m, sin6tosa(dst), ro->ro_rt); + if (error) + break; + } + if (error) + ml_purge(&ml); + else ip6stat_inc(ip6s_fragmented); done: @@ -776,7 +760,6 @@ done: } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { rtfree(ro_pmtu->ro_rt); } - return (error); freehdrs: @@ -791,45 +774,48 @@ bad: } int -ip6_fragment(struct mbuf *m0, int hlen, u_char nextproto, u_long mtu) +ip6_fragment(struct mbuf *m0, struct mbuf_list *ml, int hlen, u_char nextproto, + u_long mtu) { - struct mbuf *m, **mnext, *m_frgpart; + struct mbuf *m, *m_frgpart; struct ip6_hdr *mhip6; struct ip6_frag *ip6f; u_int32_t id; int tlen, len, off; int error; - id = htonl(ip6_randomid()); - - mnext = &m0->m_nextpkt; - *mnext = NULL; + ml_init(ml); tlen = m0->m_pkthdr.len; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; - if (len < 8) - return (EMSGSIZE); + if (len < 8) { + error = EMSGSIZE; + goto bad; + } + + id = htonl(ip6_randomid()); /* * Loop through length of segment after first fragment, - * make new header and copy data of each part and link onto - * chain. + * make new header and copy data of each part and link onto chain. */ for (off = hlen; off < tlen; off += len) { struct mbuf *mlast; - if ((m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL) - return (ENOBUFS); - *mnext = m; - mnext = &m->m_nextpkt; + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) { + error = ENOBUFS; + goto bad; + } + ml_enqueue(ml, m); if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0) - return (error); + goto bad; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *mtod(m0, struct ip6_hdr *); m->m_len = sizeof(*mhip6); if ((error = ip6_insertfraghdr(m0, m, hlen, &ip6f)) != 0) - return (error); + goto bad; ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7)); if (off + len >= tlen) len = tlen - off; @@ -837,8 +823,10 @@ ip6_fragment(struct mbuf *m0, int hlen, ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_int16_t)(len + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); - if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL) - return (ENOBUFS); + if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL) { + error = ENOBUFS; + goto bad; + } for (mlast = m; mlast->m_next; mlast = mlast->m_next) ; mlast->m_next = m_frgpart; @@ -848,7 +836,15 @@ ip6_fragment(struct mbuf *m0, int hlen, ip6f->ip6f_nxt = nextproto; } + ip6stat_add(ip6s_ofragments, ml_len(ml)); + m_freem(m0); return (0); + +bad: + ip6stat_inc(ip6s_odropped); + ml_purge(ml); + m_freem(m0); + return (error); } int Index: netinet6/ip6_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_var.h,v retrieving revision 1.87 diff -u -p -r1.87 ip6_var.h --- netinet6/ip6_var.h 11 Jan 2021 13:28:54 -0000 1.87 +++ netinet6/ip6_var.h 26 Feb 2021 10:41:57 -0000 @@ -322,7 +322,7 @@ void ip6_forward(struct mbuf *, struct r void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int, struct ip6_moptions *, struct inpcb *); -int ip6_fragment(struct mbuf *, int, u_char, u_long); +int ip6_fragment(struct mbuf *, struct mbuf_list *, int, u_char, u_long); int ip6_ctloutput(int, struct socket *, int, int, struct mbuf *); int ip6_raw_ctloutput(int, struct socket *, int, int, struct mbuf *); void ip6_initpktopts(struct ip6_pktopts *);