From: Roopa Prabhu <ro...@cumulusnetworks.com> Introduces two netlink attributes RTA_ENCAP_TYPE and RTA_ENCAP to support attaching encap information to ipv4 routes.
RTA_ENCAP is a nested attribute as suggested by Thomas (and also as Robert had it in his series). RTA_ENCAP netlink policy is declared by the light weight tunnel drivers that support this encap type. fib code calls the following for each nexthop: - new route handler: lwt build state (that parses RTA_ENCAP and returns lwt state that lives in every fib_nh) - del dump hanlder: lwt release handler to release lwt state data - route dump hanlder: lwt dump encap to fill RTA_ENCAP data - during input route lookup sets dst->output to lwtunnel_output which in turn calls the corresponding lwt tunnel output function which applies the required encap and xmits the packet Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> --- include/net/ip_fib.h | 7 ++- include/net/route.h | 3 ++ include/uapi/linux/rtnetlink.h | 3 +- net/ipv4/fib_frontend.c | 8 ++++ net/ipv4/fib_semantics.c | 93 +++++++++++++++++++++++++++++++++++++++- net/ipv4/route.c | 33 +++++++++++++- 6 files changed, 142 insertions(+), 5 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 54271ed..49f18d7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -44,7 +44,9 @@ struct fib_config { u32 fc_flow; u32 fc_nlflags; struct nl_info fc_nlinfo; - }; + struct nlattr *fc_encap; + u16 fc_encap_type; +}; struct fib_info; struct rtable; @@ -89,6 +91,9 @@ struct fib_nh { struct rtable __rcu * __percpu *nh_pcpu_rth_output; struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket __rcu *nh_exceptions; +#ifdef CONFIG_LWTUNNEL + struct lwtunnel_state *nh_lwtstate; +#endif }; /* diff --git a/include/net/route.h b/include/net/route.h index fe22d03..39a6495 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -66,6 +66,9 @@ struct rtable { struct list_head rt_uncached; struct uncached_list *rt_uncached_list; +#ifdef CONFIG_LWTUNNEL + struct lwtunnel_state *rt_lwtstate; +#endif }; static inline bool rt_is_input_route(const struct rtable *rt) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 17fb02f..6c089ad 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -308,6 +308,8 @@ enum rtattr_type_t { RTA_VIA, RTA_NEWDST, RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, __RTA_MAX }; @@ -357,7 +359,6 @@ struct rtvia { }; /* RTM_CACHEINFO */ - struct rta_cacheinfo { __u32 rta_clntref; __u32 rta_lastuse; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 872494e..fbe0630 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -591,6 +591,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, + [RTA_ENCAP] = { .type = NLA_NESTED }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, @@ -656,6 +658,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_TABLE: cfg->fc_table = nla_get_u32(attr); break; + case RTA_ENCAP: + cfg->fc_encap = attr; + break; + case RTA_ENCAP_TYPE: + cfg->fc_encap_type = nla_get_u16(attr); + break; } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 28ec3c1..54dd287 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -42,6 +42,7 @@ #include <net/ip_fib.h> #include <net/netlink.h> #include <net/nexthop.h> +#include <net/lwtunnel.h> #include "fib_lookup.h" @@ -208,6 +209,10 @@ static void free_fib_info_rcu(struct rcu_head *head) change_nexthops(fi) { if (nexthop_nh->nh_dev) dev_put(nexthop_nh->nh_dev); +#ifdef CONFIG_LWTUNNEL + if (nexthop_nh->nh_lwtstate) + lwtunnel_state_put(nexthop_nh->nh_lwtstate); +#endif free_nh_exceptions(nexthop_nh); rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_fibinfo_free(&nexthop_nh->nh_rth_input); @@ -366,6 +371,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) payload += nla_total_size((RTAX_MAX * nla_total_size(4))); if (fi->fib_nhs) { + size_t nh_encapsize = 0; /* Also handles the special case fib_nhs == 1 */ /* each nexthop is packed in an attribute */ @@ -374,8 +380,23 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) /* may contain flow and gateway attribute */ nhsize += 2 * nla_total_size(4); +#ifdef CONFIG_LWTUNNEL + /* grab encap info */ + for_nexthops(fi) { + if (nh->nh_lwtstate) { + /* RTA_ENCAP_TYPE */ + nh_encapsize += lwtunnel_get_encap_size( + nh->nh_lwtstate); + /* RTA_ENCAP */ + nh_encapsize += nla_total_size(2); + } + } endfor_nexthops(fi); +#endif + /* all nexthops are packed in a nested attribute */ - payload += nla_total_size(fi->fib_nhs * nhsize); + payload += nla_total_size((fi->fib_nhs * nhsize) + + nh_encapsize); + } return payload; @@ -452,6 +473,9 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg) { + struct net *net = cfg->fc_nlinfo.nl_net; + int ret; + change_nexthops(fi) { int attrlen; @@ -475,12 +499,40 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, if (nexthop_nh->nh_tclassid) fi->fib_net->ipv4.fib_num_tclassid_users++; #endif +#ifdef CONFIG_LWTUNNEL + nla = nla_find(attrs, attrlen, RTA_ENCAP); + if (nla) { + struct lwtunnel_state *lwtstate; + struct net_device *dev = NULL; + struct nlattr *nla_entype; + + nla_entype = nla_find(attrs, attrlen, + RTA_ENCAP_TYPE); + if (!nla_entype) + goto err_inval; + if (cfg->fc_oif) + dev = __dev_get_by_index(net, cfg->fc_oif); + ret = lwtunnel_build_state(dev, nla_get_u16( + nla_entype), + nla, &lwtstate); + if (ret) + goto errout; + lwtunnel_state_get(lwtstate); + nexthop_nh->nh_lwtstate = lwtstate; + } +#endif } rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); return 0; + +err_inval: + ret = -EINVAL; + +errout: + return ret; } #endif @@ -875,6 +927,25 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } else { struct fib_nh *nh = fi->fib_nh; +#ifdef CONFIG_LWTUNNEL + if (cfg->fc_encap) { + struct lwtunnel_state *lwtstate; + struct net_device *dev = NULL; + int ret; + + if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) + goto err_inval; + if (cfg->fc_oif) + dev = __dev_get_by_index(net, cfg->fc_oif); + ret = lwtunnel_build_state(dev, cfg->fc_encap_type, + cfg->fc_encap, &lwtstate); + if (ret) + goto err_inval; + + lwtunnel_state_get(lwtstate); + nh->nh_lwtstate = lwtstate; + } +#endif nh->nh_oif = cfg->fc_oif; nh->nh_gw = cfg->fc_gw; nh->nh_flags = cfg->fc_flags; @@ -1034,7 +1105,17 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif +#ifdef CONFIG_LWTUNNEL + if (fi->fib_nh->nh_lwtstate) { + struct lwtunnel_state *lwtstate; + + lwtstate = fi->fib_nh->nh_lwtstate; + if (nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type)) + goto nla_put_failure; + lwtunnel_fill_encap(skb, lwtstate); + } } +#endif #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { struct rtnexthop *rtnh; @@ -1061,6 +1142,16 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif +#ifdef CONFIG_LWTUNNEL + if (nh->nh_lwtstate) { + struct lwtunnel_state *lwtstate; + + lwtstate = nh->nh_lwtstate; + if (nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type)) + goto nla_put_failure; + lwtunnel_fill_encap(skb, lwtstate); + } +#endif /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f605598..c6549f9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -102,6 +102,7 @@ #include <net/tcp.h> #include <net/icmp.h> #include <net/xfrm.h> +#include <net/lwtunnel.h> #include <net/netevent.h> #include <net/rtnetlink.h> #ifdef CONFIG_SYSCTL @@ -1355,6 +1356,9 @@ static void ipv4_dst_destroy(struct dst_entry *dst) list_del(&rt->rt_uncached); spin_unlock_bh(&ul->lock); } +#ifdef CONFIG_LWTUNNEL + lwtunnel_state_put(rt->rt_lwtstate); +#endif } void rt_flush_dev(struct net_device *dev) @@ -1403,6 +1407,14 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif +#ifdef CONFIG_LWTUNNEL + if (nh->nh_lwtstate) { + lwtunnel_state_get(nh->nh_lwtstate); + rt->rt_lwtstate = nh->nh_lwtstate; + } else { + rt->rt_lwtstate = NULL; + } +#endif if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) @@ -1488,6 +1500,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); +#ifdef CONFIG_LWTUNNEL + rth->rt_lwtstate = NULL; +#endif if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1618,12 +1633,19 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); +#ifdef CONFIG_LWTUNNEL + rth->rt_lwtstate = NULL; +#endif RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); +#ifdef CONFIG_LWTUNNEL + if (lwtunnel_output_redirect(rth->rt_lwtstate)) + rth->dst.output = lwtunnel_output; +#endif skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1792,6 +1814,9 @@ local_input: rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); +#ifdef CONFIG_LWTUNNEL + rth->rt_lwtstate = NULL; +#endif RT_CACHE_STAT_INC(in_slow_tot); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1981,7 +2006,9 @@ add: rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); - +#ifdef CONFIG_LWTUNNEL + rth->rt_lwtstate = NULL; +#endif RT_CACHE_STAT_INC(out_slow_tot); if (flags & RTCF_LOCAL) @@ -2261,7 +2288,9 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_uses_gateway = ort->rt_uses_gateway; INIT_LIST_HEAD(&rt->rt_uncached); - +#ifdef CONFIG_LWTUNNEL + rt->rt_lwtstate = NULL; +#endif dst_free(new); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html