From: David Ahern <dsah...@gmail.com> Add new RTA attribute to allow a user to specify a nexthop id to use with a route instead of the current nexthop specification.
Signed-off-by: David Ahern <dsah...@gmail.com> --- include/net/ip_fib.h | 1 + include/uapi/linux/rtnetlink.h | 1 + net/ipv4/fib_frontend.c | 7 +++ net/ipv4/fib_semantics.c | 139 ++++++++++++++++++++++++++++++----------- net/ipv4/fib_trie.c | 33 +++++++--- 5 files changed, 136 insertions(+), 45 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c59e0f1ba59b..d2f961de732d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -40,6 +40,7 @@ struct fib_config { u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; + u32 fc_nh_id; struct nlattr *fc_mx; struct rtnexthop *fc_mp; int fc_mx_len; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 4a0615797e5e..a036368798a9 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -349,6 +349,7 @@ enum rtattr_type_t { RTA_IP_PROTO, RTA_SPORT, RTA_DPORT, + RTA_NH_ID, __RTA_MAX }; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c483453bf037..cf133d4e02f2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -322,6 +322,9 @@ static bool fib_info_nh_uses_dev(struct fib_info *fi, bool dev_match = false; int ret; + if (fi->nh) + return nexthop_uses_dev(fi->nh, dev); + #ifdef CONFIG_IP_ROUTE_MULTIPATH for (ret = 0; ret < fi->fib_nhs; ret++) { struct fib_nh *nh = &fi->fib_nh[ret]; @@ -663,6 +666,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_IP_PROTO] = { .type = NLA_U8 }, [RTA_SPORT] = { .type = NLA_U16 }, [RTA_DPORT] = { .type = NLA_U16 }, + [RTA_NH_ID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, @@ -746,6 +750,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, if (err < 0) goto errout; break; + case RTA_NH_ID: + cfg->fc_nh_id = nla_get_u32(attr); + break; } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0cd536ad1761..c91cdafd40ec 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -226,9 +226,13 @@ static void free_fib_info_rcu(struct rcu_head *head) struct fib_info *fi = container_of(head, struct fib_info, rcu); struct dst_metrics *m; - change_nexthops(fi) { - fib_nh_release(fi->fib_net, nexthop_nh); - } endfor_nexthops(fi); + if (fi->nh) { + nexthop_put(fi->nh); + } else { + change_nexthops(fi) { + fib_nh_release(fi->fib_net, nexthop_nh); + } endfor_nexthops(fi); + } m = fi->fib_metrics; if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) @@ -260,11 +264,15 @@ void fib_release_info(struct fib_info *fi) hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); - change_nexthops(fi) { - if (!nexthop_nh->nh_dev) - continue; - hlist_del(&nexthop_nh->nh_hash); - } endfor_nexthops(fi) + if (fi->nh) { + list_del(&fi->nh_list); + } else { + change_nexthops(fi) { + if (!nexthop_nh->nh_dev) + continue; + hlist_del(&nexthop_nh->nh_hash); + } endfor_nexthops(fi) + } fi->fib_dead = 1; fib_info_put(fi); } @@ -275,6 +283,12 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) { const struct fib_nh *onh = ofi->fib_nh; + if (fi->nh || ofi->nh) + return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1; + + if (ofi->fib_nhs == 0) + return 0; + for_nexthops(fi) { if (nh->nh_oif != onh->nh_oif || nh->nh_gw != onh->nh_gw || @@ -310,10 +324,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) val ^= (fi->fib_protocol << 8) | fi->fib_scope; val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; - for_nexthops(fi) { - val ^= fib_devindex_hashfn(nh->nh_oif); - } endfor_nexthops(fi) - + if (fi->nh) { + val ^= fib_devindex_hashfn(fi->nh->id); + } else { + for_nexthops(fi) { + val ^= fib_devindex_hashfn(nh->nh_oif); + } endfor_nexthops(fi) + } return (val ^ (val >> 7) ^ (val >> 12)) & mask; } @@ -339,7 +356,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && - (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) + (nh_comp(fi, nfi) == 0)) return fi; } @@ -349,6 +366,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) /* Check, that the gateway is already configured. * Used only by redirect accept routine. */ +//TO-DO: need a nexthop version int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; @@ -381,16 +399,19 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) + nla_total_size(4) /* RTA_PRIORITY */ + nla_total_size(4) /* RTA_PREFSRC */ + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ + size_t nhsize = 0; /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); - if (fi->fib_nhs) { + if (fi->nh) { + nhsize = nla_total_size(4); /* RTA_NH_ID */ + } else if (fi->fib_nhs) { size_t nh_encapsize = 0; /* Also handles the special case fib_nhs == 1 */ /* each nexthop is packed in an attribute */ - size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); + nhsize = nla_total_size(sizeof(struct rtnexthop)); /* may contain flow and gateway attribute */ nhsize += 2 * nla_total_size(4); @@ -539,6 +560,7 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, return nhs; } +/* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack) @@ -625,6 +647,8 @@ static void fib_rebalance(struct fib_info *fi) int w; struct in_device *in_dev; + WARN_ON(fi->nh); + if (fi->fib_nhs < 2) return; @@ -712,6 +736,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; + if (fi->nh) + return cfg->fc_nh_id == fi->nh->id ? 0 : 1; + if (cfg->fc_oif || cfg->fc_gw) { if (cfg->fc_encap) { if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, @@ -1099,9 +1126,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg, { int err; struct fib_info *fi = NULL; + struct nexthop *nh = NULL; struct fib_info *ofi; int nhs = 1; struct net *net = cfg->fc_nlinfo.nl_net; + unsigned char scope; if (cfg->fc_type > RTN_MAX) goto err_inval; @@ -1118,6 +1147,21 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto err_inval; } + if (cfg->fc_nh_id) { + if (cfg->fc_oif || cfg->fc_gw || cfg->fc_encap || cfg->fc_mp) { + NL_SET_ERR_MSG(extack, + "Nexthop specification and nexthop id are mutually exclusive"); + goto err_inval; + } + + nh = nexthop_find_by_id(net, cfg->fc_nh_id); + if (!nh) { + NL_SET_ERR_MSG(extack, + "Invalid nexthop id - nexthop does not exist"); + goto err_inval; + } + } + #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) { nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); @@ -1180,7 +1224,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg, if (err) goto failure; - if (cfg->fc_mp) { + if (nh) { + nexthop_get(nh); + fi->nh = nh; + } else if (cfg->fc_mp) { err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); if (err != 0) goto failure; @@ -1214,7 +1261,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto err_inval; } - if (cfg->fc_scope == RT_SCOPE_HOST) { + if (fi->nh) { + err = fib_check_nexthop(fi, cfg, extack); + if (err) + goto failure; + } else if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ @@ -1254,12 +1305,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto err_inval; } - change_nexthops(fi) { - fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope); - } endfor_nexthops(fi) - - fib_rebalance(fi); + if (!fi->nh) { + scope = fi->fib_scope; + change_nexthops(fi) { + fib_info_update_nh_saddr(net, nexthop_nh, scope); + } endfor_nexthops(fi) + fib_rebalance(fi); + } link_it: ofi = fib_find_info(fi); if (ofi) { @@ -1280,16 +1333,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg, head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; hlist_add_head(&fi->fib_lhash, head); } - change_nexthops(fi) { - struct hlist_head *head; - unsigned int hash; + if (fi->nh) { + list_add(&fi->nh_list, &nh->fi_list); + } else { + change_nexthops(fi) { + struct hlist_head *head; + unsigned int hash; - if (!nexthop_nh->nh_dev) - continue; - hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); - head = &fib_info_devhash[hash]; - hlist_add_head(&nexthop_nh->nh_hash, head); - } endfor_nexthops(fi) + if (!nexthop_nh->nh_dev) + continue; + hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); + head = &fib_info_devhash[hash]; + hlist_add_head(&nexthop_nh->nh_hash, head); + } endfor_nexthops(fi) + } spin_unlock_bh(&fib_info_lock); return fi; @@ -1298,6 +1355,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg, failure: if (fi) { + if (fi->nh) + nexthop_put(fi->nh); + fi->fib_dead = 1; free_fib_info(fi); } @@ -1344,7 +1404,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_prefsrc && nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; - if (fi->fib_nhs == 1) { + + if (fi->nh) { + if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id)) + goto nla_put_failure; + } else if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw && nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; @@ -1587,8 +1651,11 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) continue; break; } - if (next_fi->fib_flags & RTNH_F_DEAD) + + fnh = fib_info_nh(next_fi, 0); + if (fnh->nh_flags & RTNH_F_DEAD) continue; + last_tos = fa->fa_tos; last_prio = next_fi->fib_priority; @@ -1596,7 +1663,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) fa->fa_type != RTN_UNICAST) continue; - fnh = fib_info_nh(next_fi, 0); if (!fnh->nh_gw || fnh->nh_scope != RT_SCOPE_LINK) continue; @@ -1749,13 +1815,14 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb) { + int h; + if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi->fib_nhs > 1) { - int h = fib_multipath_hash(net, fl4, skb, NULL); - + h = fib_multipath_hash(net, fl4, skb, NULL); fib_select_multipath(res, h); } else diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index c6aab049a4ac..575bb34d895f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1445,7 +1445,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, /* Step 3: Process the leaf, if that fails fall back to backtracing */ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - int nhsel, err; + int nhsel, err, nhmax; if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { if (index >= (1ul << fa->fa_slen)) @@ -1460,6 +1460,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; if (unlikely(err < 0)) { +out_reject: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif @@ -1468,17 +1469,31 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, } if (fi->fib_flags & RTNH_F_DEAD) continue; - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - struct fib_nh *nh = &fi->fib_nh[nhsel]; - struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); + + if (fi->nh) { + if (nexthop_is_blackhole(fi->nh)) { + err = fib_props[RTN_BLACKHOLE].error; + goto out_reject; + } + nhmax = nexthop_num_path(fi->nh); + } else { + nhmax = fi->fib_nhs; + } + for (nhsel = 0; nhsel < nhmax; nhsel++) { + struct fib_nh *nh = fib_info_nh(fi, nhsel); + struct in_device *in_dev; if (nh->nh_flags & RTNH_F_DEAD) continue; - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nh->nh_flags & RTNH_F_LINKDOWN && - !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) - continue; + + if (!fi->nh) { + in_dev = __in_dev_get_rcu(nh->nh_dev); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + nh->nh_flags & RTNH_F_LINKDOWN && + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + continue; + } if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) -- 2.11.0