On Sat, Sep 20, 2014 at 12:33:11PM -0300, Rafael Zalamena wrote: > On Sun, Sep 14, 2014 at 11:51:07PM -0300, Rafael Zalamena wrote: > > The following patch implements the basics of the wire network interface. > > > > --- snipped --- > > I've added support for tcpdump'ing the wire interface, it will get all > data flowing through the wire without the MPLS / VPLS labels and control > word (just like enc(4) does for IPSec). If you want to see the full packet > see the interface facing your MPLS network (tcpdump needs some work to > identify the VPLS label and control word). >
Updated diff of wire(4) (with VLAN support): diff --git sys/conf/GENERIC sys/conf/GENERIC index a265eea..5f79987 100644 --- sys/conf/GENERIC +++ sys/conf/GENERIC @@ -93,6 +93,7 @@ pseudo-device systrace 1 # system call tracing device # clonable devices pseudo-device bpfilter # packet filter pseudo-device bridge # network bridging support +pseudo-device wire # pseudowire support pseudo-device carp # CARP protocol support pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) pseudo-device gre # GRE encapsulation interface diff --git sys/conf/files sys/conf/files index ab7af00..6af6812 100644 --- sys/conf/files +++ sys/conf/files @@ -550,6 +550,7 @@ pseudo-device tun: ifnet pseudo-device bpfilter: ifnet pseudo-device enc: ifnet pseudo-device bridge: ifnet, ether +pseudo-device wire: ifnet, ether pseudo-device vlan: ifnet, ether pseudo-device carp: ifnet, ether pseudo-device sppp: ifnet @@ -787,6 +788,7 @@ file net/if_tun.c tun needs-count file net/if_bridge.c bridge needs-count file net/bridgestp.c bridge file net/if_vlan.c vlan needs-count +file net/if_wire.c wire needs-count file net/pipex.c pipex file net/radix.c file net/radix_mpath.c !small_kernel diff --git sys/net/if_bridge.c sys/net/if_bridge.c index a70f813..8f031b7 100644 --- sys/net/if_bridge.c +++ sys/net/if_bridge.c @@ -36,6 +36,7 @@ #include "pf.h" #include "carp.h" #include "vlan.h" +#include "wire.h" #include <sys/param.h> #include <sys/systm.h> @@ -365,6 +366,11 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) /* Nothing needed */ } #endif /* NGIF */ +#if NWIRE > 0 + else if (ifs->if_type == IFT_MPLSTUNNEL) { + /* Nothing needed */ + } +#endif /* NWIRE */ else { error = EINVAL; break; diff --git sys/net/if_wire.c sys/net/if_wire.c new file mode 100644 index 0000000..9fb20af --- /dev/null +++ sys/net/if_wire.c @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2014 Rafael Zalamena <rzalam...@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bpfilter.h" +#include "vlan.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/errno.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/route.h> + +#include <netinet/in.h> + +#include <netinet/if_ether.h> +#include <netmpls/mpls.h> + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif /* NBPFILTER */ + +#if NVLAN > 0 +#include <net/if_vlan_var.h> +#endif + +struct wire_softc { + struct ifnet sc_if; + u_int32_t sc_flags; + u_int32_t sc_type; + struct shim_hdr sc_lshim; + struct shim_hdr sc_rshim; + struct sockaddr sc_nexthop; +}; + +void wireattach(int); +int wire_clone_create(struct if_clone *, int); +int wire_clone_destroy(struct ifnet *); +int wire_ioctl(struct ifnet *, u_long, caddr_t); +int wire_output(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +void wire_start(struct ifnet *); +struct mbuf *wire_vlan_handle(struct mbuf *, struct wire_softc *); +int wire_labelroute(struct ifnet *, struct shim_hdr *, int); + +struct if_clone wire_cloner = + IF_CLONE_INITIALIZER("wire", wire_clone_create, wire_clone_destroy); + +/* ARGSUSED */ +void +wireattach(int n) +{ + if_clone_attach(&wire_cloner); +} + +int +wire_clone_create(struct if_clone *ifc, int unit) +{ + struct wire_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT | M_ZERO); + if (sc == NULL) + return (ENOMEM); + + ifp = &sc->sc_if; + snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", + ifc->ifc_name, unit); + ifp->if_softc = sc; + ifp->if_mtu = ETHERMTU; + ifp->if_flags = IFF_POINTOPOINT; + ifp->if_ioctl = wire_ioctl; + ifp->if_output = wire_output; + ifp->if_start = wire_start; + ifp->if_type = IFT_MPLSTUNNEL; + ifp->if_hdrlen = ETHER_HDR_LEN; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_READY(&ifp->if_snd); + + if_attach(ifp); + if_alloc_sadl(ifp); + +#if NBPFILTER > 0 + bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); +#endif + + return (0); +} + +int +wire_clone_destroy(struct ifnet *ifp) +{ + struct wire_softc *sc = ifp->if_softc; + + ifp->if_flags &= ~IFF_RUNNING; + + if (sc->sc_lshim.shim_label) + mpls_shim_del(&sc->sc_lshim); + + if_detach(ifp); + free(sc, M_DEVBUF, 0); + + return (0); +} + +int +wire_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct ifreq *ifr = (struct ifreq *) data; + struct wire_softc *sc = ifp->if_softc; + int error = 0; + struct ifwirereq iwr; + + switch (cmd) { + case SIOCSIFMTU: + if (ifr->ifr_mtu < MPE_MTU_MIN || + ifr->ifr_mtu > MPE_MTU_MAX) + error = EINVAL; + else + ifp->if_mtu = ifr->ifr_mtu; + break; + + case SIOCSIFFLAGS: + if ((ifp->if_flags & IFF_UP)) + ifp->if_flags |= IFF_RUNNING; + else + ifp->if_flags &= ~IFF_RUNNING; + break; + + case SIOCSETWIRECFG: + error = suser(curproc, 0); + if (error != 0) + break; + + error = copyin(ifr->ifr_data, &iwr, sizeof(iwr)); + if (error != 0) + break; + + /* Teardown all configuration if got no nexthop */ + if (satosin(&iwr.iwr_nexthop)->sin_addr.s_addr == 0) { + if (wire_labelroute(ifp, &sc->sc_lshim, 0) == 0) + memset(&sc->sc_lshim, 0, sizeof(sc->sc_lshim)); + + memset(&sc->sc_rshim, 0, sizeof(sc->sc_rshim)); + memset(&sc->sc_nexthop, 0, sizeof(sc->sc_nexthop)); + sc->sc_flags = 0; + sc->sc_type = 0; + break; + } + + /* Validate input */ + if (satosin(&iwr.iwr_nexthop)->sin_family != AF_INET || + iwr.iwr_lshim.shim_label > MPLS_LABEL_MAX || + iwr.iwr_lshim.shim_label <= MPLS_LABEL_RESERVED_MAX || + iwr.iwr_rshim.shim_label > MPLS_LABEL_MAX || + iwr.iwr_rshim.shim_label <= MPLS_LABEL_RESERVED_MAX) { + error = EINVAL; + break; + } + + /* Setup labels and create inbound route */ + iwr.iwr_lshim.shim_label = + htonl(iwr.iwr_lshim.shim_label << MPLS_LABEL_OFFSET); + iwr.iwr_rshim.shim_label = + htonl(iwr.iwr_rshim.shim_label << MPLS_LABEL_OFFSET); + + if (sc->sc_lshim.shim_label != iwr.iwr_lshim.shim_label) { + if (mpls_shim_lookup(&iwr.iwr_lshim) != NULL) { + error = EEXIST; + break; + } + + if (sc->sc_lshim.shim_label != 0) + wire_labelroute(ifp, &sc->sc_lshim, 0); + + error = wire_labelroute(ifp, &iwr.iwr_lshim, 1); + if (error != 0) + break; + + sc->sc_lshim.shim_label = iwr.iwr_lshim.shim_label; + } + + /* Apply configuration */ + sc->sc_flags = iwr.iwr_flags; + sc->sc_type = iwr.iwr_type; + sc->sc_rshim.shim_label = iwr.iwr_rshim.shim_label; + sc->sc_rshim.shim_label |= MPLS_BOS_MASK; + + memset(&sc->sc_nexthop, 0, sizeof(sc->sc_nexthop)); + satosin(&sc->sc_nexthop)->sin_family = + satosin(&iwr.iwr_nexthop)->sin_family; + satosin(&sc->sc_nexthop)->sin_len = + sizeof(struct sockaddr_in); + satosin(&sc->sc_nexthop)->sin_addr.s_addr = + satosin(&iwr.iwr_nexthop)->sin_addr.s_addr; + break; + + case SIOCGETWIRECFG: + iwr.iwr_flags = sc->sc_flags; + iwr.iwr_type = sc->sc_type; + iwr.iwr_lshim.shim_label = + ((ntohl(sc->sc_lshim.shim_label & MPLS_LABEL_MASK)) >> + MPLS_LABEL_OFFSET); + iwr.iwr_rshim.shim_label = + ((ntohl(sc->sc_rshim.shim_label & MPLS_LABEL_MASK)) >> + MPLS_LABEL_OFFSET); + memcpy(&iwr.iwr_nexthop, &sc->sc_nexthop, + sizeof(iwr.iwr_nexthop)); + + error = copyout(&iwr, ifr->ifr_data, sizeof(iwr)); + break; + + default: + error = ENOTTY; + break; + } + return (error); +} + +void +wire_input(struct ifnet *ifp, struct mbuf *m) +{ + struct wire_softc *sc = ifp->if_softc; + struct ether_header *eh; + struct shim_hdr *shim; + + if (sc->sc_type == IWR_TYPE_NONE) { + m_freem(m); + return; + } + + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.ph_rtableid = ifp->if_rdomain; + + if (sc->sc_flags & IWR_FLAG_CONTROLWORD) { + shim = mtod(m, struct shim_hdr *); + m_adj(m, MPLS_HDRLEN); + + /* + * The first 4 bits identifies that this packet is a + * control word. If the control word is configured and + * we received an IP datagram we shall drop it. + */ + if (shim->shim_label & CW_ZERO_MASK) { + ifp->if_ierrors++; + m_freem(m); + return; + } + + /* We don't support fragmentation just yet. */ + if (shim->shim_label & CW_FRAG_MASK) { + ifp->if_ierrors++; + m_freem(m); + return; + } + } + + eh = mtod(m, struct ether_header *); + m_adj(m, ETHER_HDR_LEN); + if (sc->sc_type == IWR_TYPE_ETHERNET_TAGGED) { + /* Ethernet tagged, expects at least 2 VLANs */ + if (ntohs(eh->ether_type) != ETHERTYPE_QINQ) { + ifp->if_ierrors++; + m_freem(m); + return; + } + + /* Remove dummy VLAN and update ethertype */ + if (EVL_VLANOFTAG(*mtod(m, uint16_t *)) == 0) { + m_adj(m, EVL_ENCAPLEN); + eh->ether_type = htons(ETHERTYPE_VLAN); + } + } + +#if NBPFILTER > 0 + if (sc->sc_if.if_bpf) + bpf_mtap_hdr(sc->sc_if.if_bpf, (caddr_t) eh, + ETHER_HDR_LEN, m, BPF_DIRECTION_IN, NULL); +#endif + + ifp->if_ipackets++; + + ether_input(ifp, eh, m); +} + +int +wire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, + struct rtentry *rt) +{ + int error, s; + + if ((ifp->if_flags & IFF_RUNNING) == 0) { + m_freem(m); + return (0); + } + + s = splnet(); + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, error); + if (error) { + splx(s); + ifp->if_oerrors++; + return (error); + } + splx(s); + + if_start(ifp); + + return (error); +} + +/* + * This routine handles VLAN tag reinsertion in packets flowing through + * the pseudowire. Also it does the necessary modifications to the VLANs + * to respect the RFC. + */ +struct mbuf * +wire_vlan_handle(struct mbuf *m, struct wire_softc *sc) +{ + struct ifvlan *ifv = NULL; + struct ifnet *ifp; + struct ether_header eh; + struct vlan_shim { + uint16_t vs_tpid; + uint16_t vs_tci; + } vs; + + /* Get VLAN softc */ + ifp = m->m_pkthdr.rcvif; + if (strncmp(ifp->if_xname, "vlan", 4) == 0 || + strncmp(ifp->if_xname, "svlan", 5) == 0) + ifv = ifp->if_softc; + + /* + * Do VLAN managing. + * + * Case ethernet (raw): + * X VLAN: Nothing + * + * NOTE: In case of raw access mode, the if_vlan will do the job + * of dropping non tagged packets for us. + */ + if (sc->sc_type == IWR_TYPE_ETHERNET) + return (m); + + /* Copy ethernet header */ + m_copydata(m, 0, sizeof(eh), (caddr_t) &eh); + + /* + * Case ethernet-tagged: + * 0 VLAN: Drop packet + * 1 VLAN: Tag packet with dummy VLAN + * >1 VLAN: Nothing + */ + if (sc->sc_type == IWR_TYPE_ETHERNET_TAGGED) { + if (ifv == NULL) { + m_freem(m); + return (NULL); + } + if (ntohs(eh.ether_type) == ETHERTYPE_QINQ) + return (m); + } + + /* Add dummy VLAN to the beginning of the packet */ + M_PREPEND(m, EVL_ENCAPLEN, M_NOWAIT); + if (m == NULL) { + ifp->if_oerrors++; + return (NULL); + } + + if (m->m_len < (EVL_ENCAPLEN + sizeof(eh)) && + (m = m_pullup(m, (EVL_ENCAPLEN + sizeof(eh)))) == NULL) { + ifp->if_oerrors++; + return (NULL); + } + + /* Fill back the ethernet header */ + memcpy(mtod(m, caddr_t), &eh, sizeof(eh)); + + /* Add the dummy tag */ + vs.vs_tpid = htons(ETHERTYPE_QINQ); + vs.vs_tci = 0; + m_copyback(m, sizeof(eh) - sizeof(eh.ether_type), sizeof(vs), &vs, M_NOWAIT); + + return (m); +} + +void +wire_start(struct ifnet *ifp) +{ + struct mbuf *m; + struct rtentry *rt; + struct wire_softc *sc = ifp->if_softc; + struct shim_hdr *shim; + struct sockaddr sa; + int s; + + /* + * XXX: lie about being MPLS, so mpls_output() get the TTL from + * the right place. + */ + memcpy(&sa, &sc->sc_nexthop, sizeof(sc->sc_nexthop)); + satosin(&sa)->sin_family = AF_MPLS; + + for (;;) { + s = splnet(); + IFQ_DEQUEUE(&ifp->if_snd, m); + splx(s); + if (m == NULL) + return; + + if ((ifp->if_flags & IFF_RUNNING) == 0 || + sc->sc_rshim.shim_label == 0 || + sc->sc_type == IWR_TYPE_NONE) { + m_freem(m); + continue; + } + +#if NVLAN > 0 + m = wire_vlan_handle(m, sc); + if (m == NULL) + continue; +#else + /* Ethernet tagged doesn't work without VLANs'*/ + if (sc->sc_type == IWR_TYPE_ETHERNET_TAGGED) { + m_freem(m); + continue; + } +#endif /* NVLAN */ + +#if NBPFILTER > 0 + if (sc->sc_if.if_bpf) + bpf_mtap(sc->sc_if.if_bpf, m, BPF_DIRECTION_OUT); +#endif /* NBPFILTER */ + + rt = rtalloc(&sc->sc_nexthop, RT_REPORT, 0); + if (rt == NULL) { + m_freem(m); + continue; + } + + if (sc->sc_flags & IWR_FLAG_CONTROLWORD) { + M_PREPEND(m, sizeof(*shim), M_NOWAIT); + if (m == NULL) { + rtfree(rt); + continue; + } + + shim = mtod(m, struct shim_hdr *); + memset(shim, 0, sizeof(*shim)); + } + + M_PREPEND(m, sizeof(*shim), M_NOWAIT); + if (m == NULL) { + rtfree(rt); + continue; + } + + shim = mtod(m, struct shim_hdr *); + shim->shim_label = htonl(mpls_defttl) & MPLS_TTL_MASK; + shim->shim_label |= sc->sc_rshim.shim_label; + + /* XXX: MPLS only uses domain 0 */ + m->m_pkthdr.ph_rtableid = 0; + + mpls_output(rt->rt_ifp, m, &sa, rt); + + rtfree(rt); + } +} + +int +wire_labelroute(struct ifnet *ifp, struct shim_hdr *shim, int add) +{ + int error; + struct sockaddr_mpls smpls; + + if (add && (error = mpls_shim_add(shim)) != 0) + return (error); + + memset(&smpls, 0, sizeof(smpls)); + smpls.smpls_family = AF_MPLS; + smpls.smpls_label = shim->shim_label; + smpls.smpls_len = sizeof(smpls); + if (add) + error = rt_ifa_add(ifp->if_lladdr, RTF_MPLS | RTF_UP, + smplstosa(&smpls)); + else + error = rt_ifa_del(ifp->if_lladdr, RTF_MPLS | RTF_UP, + smplstosa(&smpls)); + + if (error != 0 || (add == 0 && error == 0)) + mpls_shim_del(shim); + + return (error); +} diff --git sys/netmpls/mpls.h sys/netmpls/mpls.h index 0407e99..5e9f530 100644 --- sys/netmpls/mpls.h +++ sys/netmpls/mpls.h @@ -71,6 +71,9 @@ struct shim_hdr { #define MPLS_BOS_OFFSET 8 #define MPLS_TTL_MASK __MADDR(0x000000ffU) +#define CW_ZERO_MASK __MADDR(0xf0000000U) +#define CW_FRAG_MASK __MADDR(0x00300000U) + #define MPLS_BOS_ISSET(l) (((l) & MPLS_BOS_MASK) == MPLS_BOS_MASK) /* Reserved lavel values (RFC3032) */ @@ -140,6 +143,20 @@ struct rt_mpls { &mpls_mapttl_ip6 \ } +#define IWR_TYPE_NONE 0 +#define IWR_TYPE_ETHERNET 1 +#define IWR_TYPE_ETHERNET_TAGGED 2 + +#define IWR_FLAG_CONTROLWORD 0x1 + +struct ifwirereq { + u_int32_t iwr_flags; + u_int32_t iwr_type; /* pseudowire type */ + struct shim_hdr iwr_lshim; /* local label */ + struct shim_hdr iwr_rshim; /* remote label */ + struct sockaddr iwr_nexthop; +}; + #endif #ifdef _KERNEL @@ -181,6 +198,8 @@ struct shim_entry { struct shim_hdr se_shim; }; +void wire_input(struct ifnet *, struct mbuf *); + struct shim_entry *mpls_shim_lookup(const struct shim_hdr *); int mpls_shim_add(const struct shim_hdr *); void mpls_shim_del(const struct shim_hdr *); diff --git sys/netmpls/mpls_input.c sys/netmpls/mpls_input.c index 0bdce93..e66f031 100644 --- sys/netmpls/mpls_input.c +++ sys/netmpls/mpls_input.c @@ -17,6 +17,7 @@ */ #include "mpe.h" +#include "wire.h" #include <sys/param.h> #include <sys/mbuf.h> @@ -278,6 +279,12 @@ do_v6: goto done; } #endif +#if NWIRE > 0 + if (ifp->if_type == IFT_MPLSTUNNEL) { + wire_input(ifp, m); + goto done; + } +#endif if (!rt->rt_gateway) { m_freem(m); goto done; diff --git sys/sys/sockio.h sys/sys/sockio.h index 625ee67..e81c6cf 100644 --- sys/sys/sockio.h +++ sys/sys/sockio.h @@ -190,6 +190,9 @@ #define SIOCSLIFPHYTTL _IOW('i', 168, struct ifreq) /* set tunnel ttl */ #define SIOCGLIFPHYTTL _IOWR('i', 169, struct ifreq) /* get tunnel ttl */ +#define SIOCSETWIRECFG _IOW('i', 170, struct ifreq) /* set wire config */ +#define SIOCGETWIRECFG _IOWR('i', 171, struct ifreq) /* get wire config */ + #define SIOCSVH _IOWR('i', 245, struct ifreq) /* set carp param */ #define SIOCGVH _IOWR('i', 246, struct ifreq) /* get carp param */