Add basic support for VRFs to IPv6 stack. This is a good start point. ping to and from a VRF works. Basic tcp and udp clients and server all work fine with VRFs.
Signed-off-by: David Ahern <d...@cumulusnetworks.com> --- net/ipv6/addrconf.c | 4 +++- net/ipv6/datagram.c | 4 ++++ net/ipv6/icmp.c | 6 +++++- net/ipv6/ip6_fib.c | 1 + net/ipv6/ip6_output.c | 6 ++++-- net/ipv6/ndisc.c | 9 +++++++-- net/ipv6/route.c | 17 +++++++++++++++-- 7 files changed, 39 insertions(+), 8 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 75d3dde32c69..f4677a9c01ac 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include <net/ip.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/l3mdev.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> #include <linux/netconf.h> @@ -2179,8 +2180,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; - table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9aadd57808a5..11980ee57507 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -142,6 +142,10 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a err = -EINVAL; goto out; } + } else if (sk->sk_bound_dev_if && + netif_index_is_l3_master(sock_net(sk), + sk->sk_bound_dev_if)) { + fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; } sk->sk_v6_daddr = *daddr; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6c2b2132c8d3..efb1c00f2270 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -68,6 +68,7 @@ #include <net/xfrm.h> #include <net/inet_common.h> #include <net/dsfield.h> +#include <net/l3mdev.h> #include <asm/uaccess.h> @@ -496,6 +497,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); + dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; @@ -575,7 +579,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = skb->dev->ifindex; + fl6.flowi6_oif = l3mdev_fib_oif(skb->dev); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 418d9823692b..318cf5a34ca5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -259,6 +259,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return NULL; } +EXPORT_SYMBOL_GPL(fib6_get_table); static void __net_init fib6_tables_init(struct net *net) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 291a07be5dfb..bbd752cef5c2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -55,6 +55,7 @@ #include <net/xfrm.h> #include <net/checksum.h> #include <linux/mroute6.h> +#include <net/l3mdev.h> static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) { @@ -874,7 +875,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC) && + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { dst_release(dst); dst = NULL; } @@ -1026,7 +1028,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; if (!fl6->flowi6_oif) - fl6->flowi6_oif = dst->dev->ifindex; + fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index dde5a1e5875a..278627b01283 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,6 +67,7 @@ #include <net/flow.h> #include <net/ip6_checksum.h> #include <net/inet_common.h> +#include <net/l3mdev.h> #include <linux/proc_fs.h> #include <linux/netfilter.h> @@ -147,6 +148,7 @@ struct neigh_table nd_tbl = { .gc_thresh2 = 512, .gc_thresh3 = 1024, }; +EXPORT_SYMBOL_GPL(nd_tbl); static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) { @@ -441,8 +443,9 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; + int oif = l3mdev_fib_oif(skb->dev); - icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -1487,6 +1490,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; bool ret; + int oif; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", @@ -1501,8 +1505,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) return; } + oif = l3mdev_fib_oif(dev); icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); + &saddr_buf, &ipv6_hdr(skb)->saddr, oif); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 53617d715188..2996dd957536 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -61,6 +61,7 @@ #include <net/nexthop.h> #include <net/lwtunnel.h> #include <net/ip_tunnels.h> +#include <net/l3mdev.h> #include <asm/uaccess.h> @@ -1068,6 +1069,8 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; + if (fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC) + oif = 0; redo_rt6_select: rt = rt6_select(fn, oif, strict); if (rt->rt6i_nsiblings) @@ -1165,7 +1168,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = skb->dev->ifindex, + .flowi6_iif = l3mdev_fib_oif(skb->dev), .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -1189,8 +1192,13 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { + struct dst_entry *dst; int flags = 0; + dst = l3mdev_rt6_dst_by_oif(net, fl6); + if (dst) + return dst; + fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) @@ -1772,6 +1780,8 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) idev = in6_dev_get(dev); if (!idev) goto out; + + cfg->fc_table = l3mdev_fib_table(dev) ? : cfg->fc_table; } if (cfg->fc_metric == 0) @@ -2492,6 +2502,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast) { + u32 tb_id; struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, DST_NOCOUNT); @@ -2514,7 +2525,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); + + tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; + rt->rt6i_table = fib6_get_table(net, tb_id); atomic_set(&rt->dst.__refcnt, 1); -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html