From: David Ahern <dsah...@gmail.com>

Add new RTA attribute to allow a user to specify a nexthop id to use
with a route instead of the current nexthop specification.

Signed-off-by: David Ahern <dsah...@gmail.com>
---
 include/net/ip_fib.h           |   1 +
 include/uapi/linux/rtnetlink.h |   1 +
 net/ipv4/fib_frontend.c        |   7 +++
 net/ipv4/fib_semantics.c       | 139 ++++++++++++++++++++++++++++++-----------
 net/ipv4/fib_trie.c            |  33 +++++++---
 5 files changed, 136 insertions(+), 45 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c59e0f1ba59b..d2f961de732d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -40,6 +40,7 @@ struct fib_config {
        u32                     fc_flags;
        u32                     fc_priority;
        __be32                  fc_prefsrc;
+       u32                     fc_nh_id;
        struct nlattr           *fc_mx;
        struct rtnexthop        *fc_mp;
        int                     fc_mx_len;
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 4a0615797e5e..a036368798a9 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -349,6 +349,7 @@ enum rtattr_type_t {
        RTA_IP_PROTO,
        RTA_SPORT,
        RTA_DPORT,
+       RTA_NH_ID,
        __RTA_MAX
 };
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c483453bf037..cf133d4e02f2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -322,6 +322,9 @@ static bool fib_info_nh_uses_dev(struct fib_info *fi,
        bool dev_match = false;
        int ret;
 
+       if (fi->nh)
+               return nexthop_uses_dev(fi->nh, dev);
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        for (ret = 0; ret < fi->fib_nhs; ret++) {
                struct fib_nh *nh = &fi->fib_nh[ret];
@@ -663,6 +666,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
        [RTA_IP_PROTO]          = { .type = NLA_U8 },
        [RTA_SPORT]             = { .type = NLA_U16 },
        [RTA_DPORT]             = { .type = NLA_U16 },
+       [RTA_NH_ID]             = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -746,6 +750,9 @@ static int rtm_to_fib_config(struct net *net, struct 
sk_buff *skb,
                        if (err < 0)
                                goto errout;
                        break;
+               case RTA_NH_ID:
+                       cfg->fc_nh_id = nla_get_u32(attr);
+                       break;
                }
        }
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0cd536ad1761..c91cdafd40ec 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -226,9 +226,13 @@ static void free_fib_info_rcu(struct rcu_head *head)
        struct fib_info *fi = container_of(head, struct fib_info, rcu);
        struct dst_metrics *m;
 
-       change_nexthops(fi) {
-               fib_nh_release(fi->fib_net, nexthop_nh);
-       } endfor_nexthops(fi);
+       if (fi->nh) {
+               nexthop_put(fi->nh);
+       } else {
+               change_nexthops(fi) {
+                       fib_nh_release(fi->fib_net, nexthop_nh);
+               } endfor_nexthops(fi);
+       }
 
        m = fi->fib_metrics;
        if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
@@ -260,11 +264,15 @@ void fib_release_info(struct fib_info *fi)
                hlist_del(&fi->fib_hash);
                if (fi->fib_prefsrc)
                        hlist_del(&fi->fib_lhash);
-               change_nexthops(fi) {
-                       if (!nexthop_nh->nh_dev)
-                               continue;
-                       hlist_del(&nexthop_nh->nh_hash);
-               } endfor_nexthops(fi)
+               if (fi->nh) {
+                       list_del(&fi->nh_list);
+               } else {
+                       change_nexthops(fi) {
+                               if (!nexthop_nh->nh_dev)
+                                       continue;
+                               hlist_del(&nexthop_nh->nh_hash);
+                       } endfor_nexthops(fi)
+               }
                fi->fib_dead = 1;
                fib_info_put(fi);
        }
@@ -275,6 +283,12 @@ static inline int nh_comp(const struct fib_info *fi, const 
struct fib_info *ofi)
 {
        const struct fib_nh *onh = ofi->fib_nh;
 
+       if (fi->nh || ofi->nh)
+               return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+       if (ofi->fib_nhs == 0)
+               return 0;
+
        for_nexthops(fi) {
                if (nh->nh_oif != onh->nh_oif ||
                    nh->nh_gw  != onh->nh_gw ||
@@ -310,10 +324,13 @@ static inline unsigned int fib_info_hashfn(const struct 
fib_info *fi)
        val ^= (fi->fib_protocol << 8) | fi->fib_scope;
        val ^= (__force u32)fi->fib_prefsrc;
        val ^= fi->fib_priority;
-       for_nexthops(fi) {
-               val ^= fib_devindex_hashfn(nh->nh_oif);
-       } endfor_nexthops(fi)
-
+       if (fi->nh) {
+               val ^= fib_devindex_hashfn(fi->nh->id);
+       } else {
+               for_nexthops(fi) {
+                       val ^= fib_devindex_hashfn(nh->nh_oif);
+               } endfor_nexthops(fi)
+       }
        return (val ^ (val >> 7) ^ (val >> 12)) & mask;
 }
 
@@ -339,7 +356,7 @@ static struct fib_info *fib_find_info(const struct fib_info 
*nfi)
                    memcmp(nfi->fib_metrics, fi->fib_metrics,
                           sizeof(u32) * RTAX_MAX) == 0 &&
                    !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
-                   (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+                   (nh_comp(fi, nfi) == 0))
                        return fi;
        }
 
@@ -349,6 +366,7 @@ static struct fib_info *fib_find_info(const struct fib_info 
*nfi)
 /* Check, that the gateway is already configured.
  * Used only by redirect accept routine.
  */
+//TO-DO: need a nexthop version
 int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
        struct hlist_head *head;
@@ -381,16 +399,19 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
                         + nla_total_size(4) /* RTA_PRIORITY */
                         + nla_total_size(4) /* RTA_PREFSRC */
                         + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+       size_t nhsize = 0;
 
        /* space for nested metrics */
        payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
 
-       if (fi->fib_nhs) {
+       if (fi->nh) {
+               nhsize = nla_total_size(4); /* RTA_NH_ID */
+       } else if (fi->fib_nhs) {
                size_t nh_encapsize = 0;
                /* Also handles the special case fib_nhs == 1 */
 
                /* each nexthop is packed in an attribute */
-               size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+               nhsize = nla_total_size(sizeof(struct rtnexthop));
 
                /* may contain flow and gateway attribute */
                nhsize += 2 * nla_total_size(4);
@@ -539,6 +560,7 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int 
remaining,
        return nhs;
 }
 
+/* only called when fib_nh is integrated into fib_info */
 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
                       int remaining, struct fib_config *cfg,
                       struct netlink_ext_ack *extack)
@@ -625,6 +647,8 @@ static void fib_rebalance(struct fib_info *fi)
        int w;
        struct in_device *in_dev;
 
+       WARN_ON(fi->nh);
+
        if (fi->fib_nhs < 2)
                return;
 
@@ -712,6 +736,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info 
*fi,
        if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
                return 1;
 
+       if (fi->nh)
+               return cfg->fc_nh_id == fi->nh->id ? 0 : 1;
+
        if (cfg->fc_oif || cfg->fc_gw) {
                if (cfg->fc_encap) {
                        if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
@@ -1099,9 +1126,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 {
        int err;
        struct fib_info *fi = NULL;
+       struct nexthop *nh = NULL;
        struct fib_info *ofi;
        int nhs = 1;
        struct net *net = cfg->fc_nlinfo.nl_net;
+       unsigned char scope;
 
        if (cfg->fc_type > RTN_MAX)
                goto err_inval;
@@ -1118,6 +1147,21 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                goto err_inval;
        }
 
+       if (cfg->fc_nh_id) {
+               if (cfg->fc_oif || cfg->fc_gw || cfg->fc_encap || cfg->fc_mp) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Nexthop specification and nexthop id 
are mutually exclusive");
+                       goto err_inval;
+               }
+
+               nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+               if (!nh) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Invalid nexthop id - nexthop does not 
exist");
+                       goto err_inval;
+               }
+       }
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (cfg->fc_mp) {
                nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
@@ -1180,7 +1224,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
        if (err)
                goto failure;
 
-       if (cfg->fc_mp) {
+       if (nh) {
+               nexthop_get(nh);
+               fi->nh = nh;
+       } else if (cfg->fc_mp) {
                err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
                if (err != 0)
                        goto failure;
@@ -1214,7 +1261,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                goto err_inval;
        }
 
-       if (cfg->fc_scope == RT_SCOPE_HOST) {
+       if (fi->nh) {
+               err = fib_check_nexthop(fi, cfg, extack);
+               if (err)
+                       goto failure;
+       } else if (cfg->fc_scope == RT_SCOPE_HOST) {
                struct fib_nh *nh = fi->fib_nh;
 
                /* Local address is added. */
@@ -1254,12 +1305,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                goto err_inval;
        }
 
-       change_nexthops(fi) {
-               fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
-       } endfor_nexthops(fi)
-
-       fib_rebalance(fi);
+       if (!fi->nh) {
+               scope = fi->fib_scope;
+               change_nexthops(fi) {
+                       fib_info_update_nh_saddr(net, nexthop_nh, scope);
+               } endfor_nexthops(fi)
 
+               fib_rebalance(fi);
+       }
 link_it:
        ofi = fib_find_info(fi);
        if (ofi) {
@@ -1280,16 +1333,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
                hlist_add_head(&fi->fib_lhash, head);
        }
-       change_nexthops(fi) {
-               struct hlist_head *head;
-               unsigned int hash;
+       if (fi->nh) {
+               list_add(&fi->nh_list, &nh->fi_list);
+       } else {
+               change_nexthops(fi) {
+                       struct hlist_head *head;
+                       unsigned int hash;
 
-               if (!nexthop_nh->nh_dev)
-                       continue;
-               hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
-               head = &fib_info_devhash[hash];
-               hlist_add_head(&nexthop_nh->nh_hash, head);
-       } endfor_nexthops(fi)
+                       if (!nexthop_nh->nh_dev)
+                               continue;
+                       hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
+                       head = &fib_info_devhash[hash];
+                       hlist_add_head(&nexthop_nh->nh_hash, head);
+               } endfor_nexthops(fi)
+       }
        spin_unlock_bh(&fib_info_lock);
        return fi;
 
@@ -1298,6 +1355,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 
 failure:
        if (fi) {
+               if (fi->nh)
+                       nexthop_put(fi->nh);
+
                fi->fib_dead = 1;
                free_fib_info(fi);
        }
@@ -1344,7 +1404,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 
seq, int event,
        if (fi->fib_prefsrc &&
            nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
                goto nla_put_failure;
-       if (fi->fib_nhs == 1) {
+
+       if (fi->nh) {
+               if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
+                       goto nla_put_failure;
+       } else if (fi->fib_nhs == 1) {
                if (fi->fib_nh->nh_gw &&
                    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
                        goto nla_put_failure;
@@ -1587,8 +1651,11 @@ static void fib_select_default(const struct flowi4 *flp, 
struct fib_result *res)
                                continue;
                        break;
                }
-               if (next_fi->fib_flags & RTNH_F_DEAD)
+
+               fnh = fib_info_nh(next_fi, 0);
+               if (fnh->nh_flags & RTNH_F_DEAD)
                        continue;
+
                last_tos = fa->fa_tos;
                last_prio = next_fi->fib_priority;
 
@@ -1596,7 +1663,6 @@ static void fib_select_default(const struct flowi4 *flp, 
struct fib_result *res)
                    fa->fa_type != RTN_UNICAST)
                        continue;
 
-               fnh = fib_info_nh(next_fi, 0);
                if (!fnh->nh_gw || fnh->nh_scope != RT_SCOPE_LINK)
                        continue;
 
@@ -1749,13 +1815,14 @@ void fib_select_multipath(struct fib_result *res, int 
hash)
 void fib_select_path(struct net *net, struct fib_result *res,
                     struct flowi4 *fl4, const struct sk_buff *skb)
 {
+       int h;
+
        if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
                goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi->fib_nhs > 1) {
-               int h = fib_multipath_hash(net, fl4, skb, NULL);
-
+               h = fib_multipath_hash(net, fl4, skb, NULL);
                fib_select_multipath(res, h);
        }
        else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c6aab049a4ac..575bb34d895f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1445,7 +1445,7 @@ int fib_table_lookup(struct fib_table *tb, const struct 
flowi4 *flp,
        /* Step 3: Process the leaf, if that fails fall back to backtracing */
        hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
                struct fib_info *fi = fa->fa_info;
-               int nhsel, err;
+               int nhsel, err, nhmax;
 
                if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
                        if (index >= (1ul << fa->fa_slen))
@@ -1460,6 +1460,7 @@ int fib_table_lookup(struct fib_table *tb, const struct 
flowi4 *flp,
                fib_alias_accessed(fa);
                err = fib_props[fa->fa_type].error;
                if (unlikely(err < 0)) {
+out_reject:
 #ifdef CONFIG_IP_FIB_TRIE_STATS
                        this_cpu_inc(stats->semantic_match_passed);
 #endif
@@ -1468,17 +1469,31 @@ int fib_table_lookup(struct fib_table *tb, const struct 
flowi4 *flp,
                }
                if (fi->fib_flags & RTNH_F_DEAD)
                        continue;
-               for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-                       struct fib_nh *nh = &fi->fib_nh[nhsel];
-                       struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
+
+               if (fi->nh) {
+                       if (nexthop_is_blackhole(fi->nh)) {
+                               err = fib_props[RTN_BLACKHOLE].error;
+                               goto out_reject;
+                       }
+                       nhmax = nexthop_num_path(fi->nh);
+               } else {
+                       nhmax = fi->fib_nhs;
+               }
+               for (nhsel = 0; nhsel < nhmax; nhsel++) {
+                       struct fib_nh *nh = fib_info_nh(fi, nhsel);
+                       struct in_device *in_dev;
 
                        if (nh->nh_flags & RTNH_F_DEAD)
                                continue;
-                       if (in_dev &&
-                           IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-                           nh->nh_flags & RTNH_F_LINKDOWN &&
-                           !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
-                               continue;
+
+                       if (!fi->nh) {
+                               in_dev = __in_dev_get_rcu(nh->nh_dev);
+                               if (in_dev &&
+                                   IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) 
&&
+                                   nh->nh_flags & RTNH_F_LINKDOWN &&
+                                   !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+                                       continue;
+                       }
                        if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
                                if (flp->flowi4_oif &&
                                    flp->flowi4_oif != nh->nh_oif)
-- 
2.11.0

Reply via email to