Thank you for the comments Martin. Here is an improved diff. On Sun, Dec 23, 2018 at 02:22:45PM -0200, Martin Pieuchot wrote: > Comments below > > > Index: net/if_ethersubr.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_ethersubr.c,v > > retrieving revision 1.255 > > diff -u -p -r1.255 if_ethersubr.c > > --- net/if_ethersubr.c 12 Dec 2018 05:38:26 -0000 1.255 > > +++ net/if_ethersubr.c 18 Dec 2018 11:03:33 -0000 > > @@ -246,18 +246,28 @@ ether_resolve(struct ifnet *ifp, struct > > sizeof(eh->ether_dhost)); > > break; > > #ifdef INET6 > > +do_v6: > > case AF_INET6: > > error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); > > if (error) > > return (error); > > break; > > #endif > > +do_v4: > > case AF_INET: > > - case AF_MPLS: > > error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); > > if (error) > > return (error); > > break; > > + case AF_MPLS: > > + switch (rt->rt_gateway->sa_family) { > > + case AF_INET: > > + goto do_v4; > > +#ifdef INET6 > > + case AF_INET6: > > + goto do_v6; > > +#endif > > + } > > default: > > senderr(EHOSTUNREACH); > > } > > This makes ether_resolve() (aka ether_output()) even more spaghetti for MPLS. > > I know this is not your code, but is it possible to do better than that? > The current logic hides an encapsulation. For example would it be possible > to call ether_resolve() recursively in the AF_MPLS case? >
I removed the spaghetti-code. It is simpler but it still "hides" the encap though. > > Index: net/if_mpe.c > > =================================================================== > > RCS file: /cvs/src/sys/net/if_mpe.c,v > > retrieving revision 1.64 > > diff -u -p -r1.64 if_mpe.c > > --- net/if_mpe.c 9 Jan 2018 15:24:24 -0000 1.64 > > +++ net/if_mpe.c 18 Dec 2018 11:03:33 -0000 > > @@ -85,7 +85,7 @@ mpe_clone_create(struct if_clone *ifc, i > > mpeif->sc_unit = unit; > > ifp = &mpeif->sc_if; > > snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit); > > - ifp->if_flags = IFF_POINTOPOINT; > > + ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; > > You added this flag because the nd6 codes relies on it, right? But does > this reflect reality? > In in6_ifattach(), IFF_MULTICAST is needed to proceed : L456: if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EINVAL); but it does not reflect reality as mpe(4) does nothing with multicast (no DAD, no ND) > > @@ -204,6 +214,9 @@ mpeoutput(struct ifnet *ifp, struct mbuf > > int error; > > int off; > > in_addr_t addr; > > +#ifdef INET6 > > + struct in6_addr addr6; > > +#endif > > u_int8_t op = 0; > > > > #ifdef DIAGNOSTIC > > @@ -251,6 +264,39 @@ mpeoutput(struct ifnet *ifp, struct mbuf > > m_copyback(m, sizeof(sa_family_t), sizeof(in_addr_t), > > &addr, M_NOWAIT); > > break; > > +#ifdef INET6 > > + case AF_INET6: > > + if (!rt || !(rt->rt_flags & RTF_MPLS)) { > > I know you copied past, but checking for rt != NULL should be avoided. > You should use rtisvalid(). Could you please merge the blocks related > to rtentry. I general if you can reduce the copy/paste between v4 & v6 > that would be great! > > Another style neat, we use the ISSET() macro for checking RTF_* flags ;) > fixed :) > > @@ -354,6 +400,9 @@ mpeioctl(struct ifnet *ifp, u_long cmd, > > } > > /* return with ENOTTY so that the parent handler finishes */ > > return (ENOTTY); > > + case SIOCADDMULTI: > > + case SIOCDELMULTI: > > + break; > > Why do you need these handlers? This is hiding something wrong :) > These are called when configuring IPv6 on the interface. > > Index: netinet6/icmp6.c > > =================================================================== > > RCS file: /cvs/src/sys/netinet6/icmp6.c,v > > retrieving revision 1.228 > > diff -u -p -r1.228 icmp6.c > > --- netinet6/icmp6.c 10 Dec 2018 23:00:01 -0000 1.228 > > +++ netinet6/icmp6.c 18 Dec 2018 11:03:33 -0000 > > @@ -102,6 +102,10 @@ > > #include <net/pfvar.h> > > #endif > > > > +#ifdef MPLS > > +#include <netmpls/mpls.h> > > +#endif > > Can you please find another way than spreading an #ifdef MPLS in IPv6 > code? > fixed ! Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.256 diff -u -p -r1.256 if_ethersubr.c --- net/if_ethersubr.c 20 Dec 2018 23:00:55 -0000 1.256 +++ net/if_ethersubr.c 24 Dec 2018 14:39:26 -0000 @@ -243,7 +243,11 @@ ether_resolve(struct ifnet *ifp, struct if (!ISSET(ifp->if_xflags, IFXF_MPLS)) senderr(ENETUNREACH); - switch (dst->sa_family) { + af = dst->sa_family; + if (af == AF_MPLS) + af = rt->rt_gateway->sa_family; + + switch (af) { case AF_LINK: if (satosdl(dst)->sdl_alen < sizeof(eh->ether_dhost)) senderr(EHOSTUNREACH); @@ -258,7 +262,6 @@ ether_resolve(struct ifnet *ifp, struct break; #endif case AF_INET: - case AF_MPLS: error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); if (error) return (error); Index: net/if_mpe.c =================================================================== RCS file: /cvs/src/sys/net/if_mpe.c,v retrieving revision 1.64 diff -u -p -r1.64 if_mpe.c --- net/if_mpe.c 9 Jan 2018 15:24:24 -0000 1.64 +++ net/if_mpe.c 24 Dec 2018 14:39:26 -0000 @@ -85,7 +85,7 @@ mpe_clone_create(struct if_clone *ifc, i mpeif->sc_unit = unit; ifp = &mpeif->sc_if; snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit); - ifp->if_flags = IFF_POINTOPOINT; + ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; ifp->if_xflags = IFXF_CLONED; ifp->if_softc = mpeif; ifp->if_mtu = MPE_MTU; @@ -157,6 +157,16 @@ mpestart(struct ifnet *ifp0) sizeof(in_addr_t)); m_adj(m, sizeof(in_addr_t)); break; +#ifdef INET6 + case AF_INET6: + memset(sa, 0, sizeof(struct sockaddr_in6)); + satosin6(sa)->sin6_family = af; + satosin6(sa)->sin6_len = sizeof(struct sockaddr_in6); + bcopy(mtod(m, caddr_t), &satosin6(sa)->sin6_addr, + sizeof(struct in6_addr)); + m_adj(m, sizeof(struct in6_addr)); + break; +#endif default: m_freem(m); continue; @@ -204,6 +214,7 @@ mpeoutput(struct ifnet *ifp, struct mbuf int error; int off; in_addr_t addr; + struct in6_addr addr6; u_int8_t op = 0; #ifdef DIAGNOSTIC @@ -217,23 +228,23 @@ mpeoutput(struct ifnet *ifp, struct mbuf /* XXX assumes MPLS is always in rdomain 0 */ m->m_pkthdr.ph_rtableid = 0; + if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) { + m_freem(m); + error = ENETUNREACH; + goto out; + } + shim.shim_label = ((struct rt_mpls *)rt->rt_llinfo)->mpls_label; + shim.shim_label |= MPLS_BOS_MASK; + op = ((struct rt_mpls *)rt->rt_llinfo)->mpls_operation; + if (op != MPLS_OP_PUSH) { + m_freem(m); + error = ENETUNREACH; + goto out; + } + error = 0; switch (dst->sa_family) { case AF_INET: - if (!rt || !(rt->rt_flags & RTF_MPLS)) { - m_freem(m); - error = ENETUNREACH; - goto out; - } - shim.shim_label = - ((struct rt_mpls *)rt->rt_llinfo)->mpls_label; - shim.shim_label |= MPLS_BOS_MASK; - op = ((struct rt_mpls *)rt->rt_llinfo)->mpls_operation; - if (op != MPLS_OP_PUSH) { - m_freem(m); - error = ENETUNREACH; - goto out; - } if (mpls_mapttl_ip) { struct ip *ip; ip = mtod(m, struct ip *); @@ -251,6 +262,26 @@ mpeoutput(struct ifnet *ifp, struct mbuf m_copyback(m, sizeof(sa_family_t), sizeof(in_addr_t), &addr, M_NOWAIT); break; +#ifdef INET6 + case AF_INET6: + if (mpls_mapttl_ip) { + struct ip6_hdr *ip6; + ip6 = mtod(m, struct ip6_hdr *); + shim.shim_label |= htonl(ip6->ip6_hops) & MPLS_TTL_MASK; + } else + shim.shim_label |= htonl(mpls_defttl) & MPLS_TTL_MASK; + off = sizeof(sa_family_t) + sizeof(struct in6_addr); + M_PREPEND(m, sizeof(shim) + off, M_DONTWAIT); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + *mtod(m, sa_family_t *) = AF_INET6; + addr6 = satosin6(rt->rt_gateway)->sin6_addr; + m_copyback(m, sizeof(sa_family_t), sizeof(struct in6_addr), + &addr6, M_NOWAIT); + break; +#endif default: m_freem(m); error = EPFNOSUPPORT; @@ -354,6 +385,9 @@ mpeioctl(struct ifnet *ifp, u_long cmd, } /* return with ENOTTY so that the parent handler finishes */ return (ENOTTY); + case SIOCADDMULTI: + case SIOCDELMULTI: + break; default: return (ENOTTY); } Index: netinet/icmp6.h =================================================================== RCS file: /cvs/src/sys/netinet/icmp6.h,v retrieving revision 1.47 diff -u -p -r1.47 icmp6.h --- netinet/icmp6.h 8 Aug 2017 18:15:58 -0000 1.47 +++ netinet/icmp6.h 24 Dec 2018 14:39:26 -0000 @@ -610,16 +610,17 @@ struct rtentry; struct rttimer; struct in6_multi; -void icmp6_init(void); -void icmp6_paramerror(struct mbuf *, int); -void icmp6_error(struct mbuf *, int, int, int); -int icmp6_input(struct mbuf **, int *, int, int); -void icmp6_fasttimo(void); -void icmp6_reflect(struct mbuf *, size_t); -void icmp6_prepare(struct mbuf *); -void icmp6_redirect_input(struct mbuf *, int); -void icmp6_redirect_output(struct mbuf *, struct rtentry *); -int icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t); +void icmp6_init(void); +void icmp6_paramerror(struct mbuf *, int); +struct mbuf *icmp6_do_error(struct mbuf *, int, int, int); +void icmp6_error(struct mbuf *, int, int, int); +int icmp6_input(struct mbuf **, int *, int, int); +void icmp6_fasttimo(void); +int icmp6_reflect(struct mbuf *, size_t, struct sockaddr *); +void icmp6_prepare(struct mbuf *); +void icmp6_redirect_input(struct mbuf *, int); +void icmp6_redirect_output(struct mbuf *, struct rtentry *); +int icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t); struct ip6ctlparam; void icmp6_mtudisc_update(struct ip6ctlparam *, int); Index: netinet6/icmp6.c =================================================================== RCS file: /cvs/src/sys/netinet6/icmp6.c,v retrieving revision 1.228 diff -u -p -r1.228 icmp6.c --- netinet6/icmp6.c 10 Dec 2018 23:00:01 -0000 1.228 +++ netinet6/icmp6.c 24 Dec 2018 14:39:26 -0000 @@ -231,11 +231,8 @@ icmp6_mtudisc_callback_register(void (*f LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list); } -/* - * Generate an error packet of type error in response to bad IP6 packet. - */ -void -icmp6_error(struct mbuf *m, int type, int code, int param) +struct mbuf * +icmp6_do_error(struct mbuf *m, int type, int code, int param) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; @@ -250,8 +247,9 @@ icmp6_error(struct mbuf *m, int type, in if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); - if (m == NULL) - return; + if (m == NULL) { + return (NULL); + } } oip6 = mtod(m, struct ip6_hdr *); @@ -294,7 +292,7 @@ icmp6_error(struct mbuf *m, int type, in sizeof(*icp)); if (icp == NULL) { icmp6stat_inc(icp6s_tooshort); - return; + return (NULL); } if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { @@ -334,7 +332,7 @@ icmp6_error(struct mbuf *m, int type, in m = m_pullup(m, preplen); if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); - return; + return (NULL); } nip6 = mtod(m, struct ip6_hdr *); @@ -361,18 +359,33 @@ icmp6_error(struct mbuf *m, int type, in m->m_pkthdr.ph_ifidx = 0; icmp6stat_inc(icp6s_outhist + type); - icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ - return; + return (m); freeit: /* * If we can't tell wheter or not we can generate ICMP6, free it. */ - m_freem(m); + return(m_freem(m)); } /* + * Generate an error packet of type error in response to bad IP6 packet. + */ +void +icmp6_error(struct mbuf *m, int type, int code, int param) +{ + struct mbuf *n; + + n = icmp6_do_error(m, type, code, param); + if (n != NULL) { + /* header order: IPv6 - ICMPv6 */ + if (!icmp6_reflect(n, sizeof(struct ip6_hdr), NULL)) + ip6_send(n); + } +} + +/* * Process a received ICMP6 message. */ int @@ -597,7 +610,8 @@ icmp6_input(struct mbuf **mp, int *offp, nicmp6->icmp6_code = 0; icmp6stat_inc(icp6s_reflect); icmp6stat_inc(icp6s_outhist + ICMP6_ECHO_REPLY); - icmp6_reflect(n, noff); + if (!icmp6_reflect(n, noff, NULL)) + ip6_send(n); } if (!m) goto freeit; @@ -1030,8 +1044,8 @@ icmp6_mtudisc_update(struct ip6ctlparam * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ -void -icmp6_reflect(struct mbuf *m, size_t off) +int +icmp6_reflect(struct mbuf *m, size_t off, struct sockaddr *sa) { struct rtentry *rt = NULL; struct ip6_hdr *ip6; @@ -1051,8 +1065,10 @@ icmp6_reflect(struct mbuf *m, size_t off goto bad; } - if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) - goto bad; + if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { + m_freem(m); + return (ELOOP); + } rtableid = m->m_pkthdr.ph_rtableid; m_resethdr(m); m->m_pkthdr.ph_rtableid = rtableid; @@ -1071,7 +1087,7 @@ icmp6_reflect(struct mbuf *m, size_t off l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) - return; + return (EMSGSIZE); } memcpy(mtod(m, caddr_t), &nip6, sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { @@ -1079,7 +1095,7 @@ icmp6_reflect(struct mbuf *m, size_t off l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) - return; + return (EMSGSIZE); } } ip6 = mtod(m, struct ip6_hdr *); @@ -1094,8 +1110,8 @@ icmp6_reflect(struct mbuf *m, size_t off ip6->ip6_dst = ip6->ip6_src; /* - * XXX: make sure to embed scope zone information, using - * already embedded IDs or the received interface (if any). + * XXX: make sure to embed scope zone information, using already + * embedded IDs or the received interface (if any). * Note that rcvif may be NULL. * TODO: scoped routing case (XXX). */ @@ -1108,38 +1124,42 @@ icmp6_reflect(struct mbuf *m, size_t off sa6_dst.sin6_len = sizeof(sa6_dst); sa6_dst.sin6_addr = t; - /* - * If the incoming packet was addressed directly to us (i.e. unicast), - * use dst as the src for the reply. - * The IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare, - * but is possible (for example) when we encounter an error while - * forwarding procedure destined to a duplicated address of ours. - */ - rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid); - if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && - !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags, - IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) { - src = &t; + if (sa == NULL) { + /* + * If the incoming packet was addressed directly to us (i.e. + * unicast), use dst as the src for the reply. The + * IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare, + * but is possible (for example) when we encounter an error + * while forwarding procedure destined to a duplicated address + * of ours. + */ + rt = rtalloc(sin6tosa(&sa6_dst), 0, rtableid); + if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && + !ISSET(ifatoia6(rt->rt_ifa)->ia6_flags, + IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)) { + src = &t; + } + rtfree(rt); + rt = NULL; + sa = sin6tosa(&sa6_src); } - rtfree(rt); - rt = NULL; if (src == NULL) { struct in6_ifaddr *ia6; /* * This case matches to multicasts, our anycast, or unicasts - * that we do not own. Select a source address based on the + * that we do not own. Select a source address based on the * source address of the erroneous packet. */ - rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE, rtableid); + rt = rtalloc(sa, RT_RESOLVE, rtableid); if (!rtisvalid(rt)) { char addr[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "%s: source can't be determined: dst=%s\n", __func__, inet_ntop(AF_INET6, &sa6_src.sin6_addr, - addr, sizeof(addr)))); + addr, sizeof(addr)))); rtfree(rt); goto bad; } @@ -1167,13 +1187,11 @@ icmp6_reflect(struct mbuf *m, size_t off */ m->m_flags &= ~(M_BCAST|M_MCAST); - - ip6_send(m); - return; + return (0); bad: m_freem(m); - return; + return (EHOSTUNREACH); } void Index: netmpls/mpls_input.c =================================================================== RCS file: /cvs/src/sys/netmpls/mpls_input.c,v retrieving revision 1.68 diff -u -p -r1.68 mpls_input.c --- netmpls/mpls_input.c 12 Jan 2018 06:57:56 -0000 1.68 +++ netmpls/mpls_input.c 24 Dec 2018 14:39:26 -0000 @@ -36,6 +36,8 @@ #ifdef INET6 #include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet/icmp6.h> #endif /* INET6 */ #include <netmpls/mpls.h> @@ -200,8 +202,19 @@ do_v6: #if NMPE > 0 if (ifp->if_type == IFT_MPLS) { smpls = satosmpls(rt_key(rt)); - mpe_input(m, ifp, smpls, ttl); - goto done; + if (m->m_len < sizeof(u_char) && + (m = m_pullup(m, sizeof(u_char))) == NULL) + return; + switch (*mtod(m, u_char *) >> 4) { + case IPVERSION: + mpe_input(m, ifp, smpls, ttl); + goto done; +#ifdef INET6 + case IPV6_VERSION >> 4: + mpe_input6(m, ifp, smpls, ttl); + goto done; +#endif + } } #endif if (ifp->if_type == IFT_MPLSTUNNEL) { @@ -345,6 +358,9 @@ mpls_do_error(struct mbuf *m, int type, struct icmp *icp; struct ip *ip; int nstk, error; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif for (nstk = 0; nstk < MPLS_INKERNEL_LOOP_MAX; nstk++) { if (m->m_len < sizeof(*shim) && @@ -419,6 +435,28 @@ mpls_do_error(struct mbuf *m, int type, break; #ifdef INET6 case IPV6_VERSION >> 4: + m = icmp6_do_error(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_TRANSIT, 0); + if (m == NULL) + return (NULL); + + /* set ip_src to something usable, based on the MPLS label */ + bzero(&sa_mpls, sizeof(sa_mpls)); + smpls = &sa_mpls; + smpls->smpls_family = AF_MPLS; + smpls->smpls_len = sizeof(*smpls); + smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK; + + error = icmp6_reflect(m, sizeof(struct ip6_hdr), + smplstosa(smpls)); + if (error) + return (NULL); + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags |= M_ICMP_CSUM_OUT; + in6_proto_cksum_out(m, NULL); + break; #endif default: m_freem(m);