From: David Ahern <dsah...@gmail.com> Exceptions are really per device, so move rt6i_exception_bucket to fib_nh_common as a generic nhc_exception_bucket. Move the flushed flag to common as well. fib_nh_common for both is a strategic choice to reduce memory consumption. Moving to fib6_nh pushes the struct over 256 which increases the actual allocation of a fib entry to 512.
Exception flushing when a fib entry is deleted is limited to the exceptions per nexthop that reference the to-be-deleted fib entry (ie., 'from' points to it). When a fib6_nh is released, all exceptions are flushed. Move the core logic of rt6_flush_exceptions, rt6_remove_exception_rt and rt6_update_exception_stamp_rt to helpers that can be invoked per fib6_nh. For fib6_nh_flush_exceptions, only remove the exception if from is NULL (ie., flushing all exceptions) or the rt6_info->from matches (fib delete). Signed-off-by: David Ahern <dsah...@gmail.com> --- include/net/ip6_fib.h | 6 +-- include/net/ip_fib.h | 4 +- net/ipv6/ip6_fib.c | 7 --- net/ipv6/route.c | 140 ++++++++++++++++++++++++++++++++------------------ 4 files changed, 96 insertions(+), 61 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 58dbb4e82908..c1d1e32e1a19 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -153,7 +153,6 @@ struct fib6_info { struct rt6key fib6_prefsrc; struct rt6_info * __percpu *rt6i_pcpu; - struct rt6_exception_bucket __rcu *rt6i_exception_bucket; #ifdef CONFIG_IPV6_ROUTER_PREF unsigned long last_probe; @@ -162,12 +161,11 @@ struct fib6_info { u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; - u8 exception_bucket_flushed:1, - should_flush:1, + u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, dst_host:1, - unused:3; + unused:4; struct fib6_nh fib6_nh; struct rcu_head rcu; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index cce437a1b2ff..063430ca0c6e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -84,7 +84,8 @@ struct fib_nh_common { unsigned char nhc_scope; u8 nhc_family; u8 nhc_has_gw:1, - unused:7; + nhc_exceptions_flushed:1, + unused:6; union { __be32 ipv4; struct in6_addr ipv6; @@ -96,6 +97,7 @@ struct fib_nh_common { /* v4 specific, but allows v6 gw with v4 routes */ struct rtable __rcu * __percpu *nhc_pcpu_rth_output; struct rtable __rcu *nhc_rth_input; + void __rcu *nhc_exceptions; }; struct fib_nh { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 8c00609a1513..cce976a59a8c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -170,16 +170,9 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) void fib6_info_destroy_rcu(struct rcu_head *head) { struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); - struct rt6_exception_bucket *bucket; WARN_ON(f6i->fib6_node); - bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); - if (bucket) { - f6i->rt6i_exception_bucket = NULL; - kfree(bucket); - } - if (f6i->rt6i_pcpu) { int cpu; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e0ee30cbd079..c66b9ac37036 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1413,6 +1413,7 @@ static unsigned int fib6_mtu(const struct fib6_info *rt) static int rt6_insert_exception(struct rt6_info *nrt, struct fib6_info *ort) { + struct fib_nh_common *nhc = &ort->fib6_nh.nh_common; struct net *net = dev_net(nrt->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; @@ -1421,12 +1422,12 @@ static int rt6_insert_exception(struct rt6_info *nrt, spin_lock_bh(&rt6_exception_lock); - if (ort->exception_bucket_flushed) { + if (nhc->nhc_exceptions_flushed) { err = -EINVAL; goto out; } - bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, + bucket = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&rt6_exception_lock)); if (!bucket) { bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), @@ -1435,7 +1436,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, err = -ENOMEM; goto out; } - rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); + rcu_assign_pointer(nhc->nhc_exceptions, bucket); } #ifdef CONFIG_IPV6_SUBTREES @@ -1490,8 +1491,9 @@ static int rt6_insert_exception(struct rt6_info *nrt, return err; } -void rt6_flush_exceptions(struct fib6_info *rt) +static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from) { + struct fib_nh_common *nhc = &nh->nh_common; struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; struct hlist_node *tmp; @@ -1499,17 +1501,21 @@ void rt6_flush_exceptions(struct fib6_info *rt) spin_lock_bh(&rt6_exception_lock); /* Prevent rt6_insert_exception() to recreate the bucket list */ - rt->exception_bucket_flushed = 1; + if (!from) + nhc->nhc_exceptions_flushed = 1; - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + bucket = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&rt6_exception_lock)); if (!bucket) goto out; for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { - hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) - rt6_remove_exception(bucket, rt6_ex); - WARN_ON_ONCE(bucket->depth); + hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) { + if (!from || + rcu_access_pointer(rt6_ex->rt6i->from) == from) + rt6_remove_exception(bucket, rt6_ex); + } + WARN_ON_ONCE(!from && bucket->depth); bucket++; } @@ -1517,6 +1523,11 @@ void rt6_flush_exceptions(struct fib6_info *rt) spin_unlock_bh(&rt6_exception_lock); } +void rt6_flush_exceptions(struct fib6_info *f6i) +{ + fib6_nh_flush_exceptions(&f6i->fib6_nh, f6i); +} + /* Find cached rt in the hash table inside passed in rt * Caller has to hold rcu_read_lock() */ @@ -1524,12 +1535,13 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr) { + struct fib_nh_common *nhc = &rt->fib6_nh.nh_common; struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; struct rt6_info *res = NULL; - bucket = rcu_dereference(rt->rt6i_exception_bucket); + bucket = rcu_dereference(nhc->nhc_exceptions); #ifdef CONFIG_IPV6_SUBTREES /* rt6i_src.plen != 0 indicates rt is in subtree @@ -1549,25 +1561,20 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, return res; } -/* Remove the passed in cached rt from the hash table that contains it */ -static int rt6_remove_exception_rt(struct rt6_info *rt) +static int fib6_nh_remove_exception(struct fib6_nh *nh, int plen, + const struct rt6_info *rt) { + struct fib_nh_common *nhc = &nh->nh_common; + const struct in6_addr *src_key = NULL; struct rt6_exception_bucket *bucket; - struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; - struct fib6_info *from; - int err; - - from = rcu_dereference(rt->from); - if (!from || - !(rt->rt6i_flags & RTF_CACHE)) - return -EINVAL; + int err = 0; - if (!rcu_access_pointer(from->rt6i_exception_bucket)) + if (!rcu_access_pointer(nhc->nhc_exceptions)) return -ENOENT; spin_lock_bh(&rt6_exception_lock); - bucket = rcu_dereference_protected(from->rt6i_exception_bucket, + bucket = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&rt6_exception_lock)); #ifdef CONFIG_IPV6_SUBTREES /* rt6i_src.plen != 0 indicates 'from' is in subtree @@ -1576,39 +1583,43 @@ static int rt6_remove_exception_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->fib6_src.plen) + if (plen) src_key = &rt->rt6i_src.addr; #endif rt6_ex = __rt6_find_exception_spinlock(&bucket, &rt->rt6i_dst.addr, src_key); - if (rt6_ex) { + if (rt6_ex) rt6_remove_exception(bucket, rt6_ex); - err = 0; - } else { + else err = -ENOENT; - } spin_unlock_bh(&rt6_exception_lock); return err; } -/* Find rt6_ex which contains the passed in rt cache and - * refresh its stamp - */ -static void rt6_update_exception_stamp_rt(struct rt6_info *rt) +/* Remove the passed in cached rt from the hash table that contains it */ +static int rt6_remove_exception_rt(struct rt6_info *rt) { - struct rt6_exception_bucket *bucket; - struct in6_addr *src_key = NULL; - struct rt6_exception *rt6_ex; struct fib6_info *from; - rcu_read_lock(); from = rcu_dereference(rt->from); if (!from || !(rt->rt6i_flags & RTF_CACHE)) - goto unlock; + return -EINVAL; - bucket = rcu_dereference(from->rt6i_exception_bucket); + return fib6_nh_remove_exception(&from->fib6_nh, + from->fib6_src.plen, rt); +} + +static void fib6_nh_update_exception(struct fib6_nh *nh, int plen, + const struct rt6_info *rt) +{ + struct fib_nh_common *nhc = &nh->nh_common; + const struct in6_addr *src_key = NULL; + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + + bucket = rcu_dereference(nhc->nhc_exceptions); #ifdef CONFIG_IPV6_SUBTREES /* rt6i_src.plen != 0 indicates 'from' is in subtree @@ -1617,15 +1628,28 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->fib6_src.plen) + if (plen) src_key = &rt->rt6i_src.addr; #endif - rt6_ex = __rt6_find_exception_rcu(&bucket, - &rt->rt6i_dst.addr, - src_key); + rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key); if (rt6_ex) rt6_ex->stamp = jiffies; +} + +/* Find rt6_ex which contains the passed in rt cache and + * refresh its stamp + */ +static void rt6_update_exception_stamp_rt(struct rt6_info *rt) +{ + struct fib6_info *from; + + rcu_read_lock(); + from = rcu_dereference(rt->from); + if (!from || !(rt->rt6i_flags & RTF_CACHE)) + goto unlock; + + fib6_nh_update_exception(&from->fib6_nh, from->fib6_src.plen, rt); unlock: rcu_read_unlock(); } @@ -1655,11 +1679,12 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, struct fib6_info *rt, int mtu) { + struct fib_nh_common *nhc = &rt->fib6_nh.nh_common; struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; int i; - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + bucket = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&rt6_exception_lock)); if (!bucket) @@ -1686,16 +1711,17 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, static void rt6_exceptions_clean_tohost(struct fib6_info *rt, struct in6_addr *gateway) { + struct fib_nh_common *nhc = &rt->fib6_nh.nh_common; struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; struct hlist_node *tmp; int i; - if (!rcu_access_pointer(rt->rt6i_exception_bucket)) + if (!rcu_access_pointer(nhc->nhc_exceptions)) return; spin_lock_bh(&rt6_exception_lock); - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + bucket = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&rt6_exception_lock)); if (bucket) { @@ -1768,15 +1794,18 @@ void rt6_age_exceptions(struct fib6_info *rt, { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; + struct fib_nh_common *nhc; struct hlist_node *tmp; int i; - if (!rcu_access_pointer(rt->rt6i_exception_bucket)) - return; - rcu_read_lock_bh(); + + nhc = &rt->fib6_nh.nh_common; + if (!rcu_access_pointer(nhc->nhc_exceptions)) + goto out; + spin_lock(&rt6_exception_lock); - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + bucket = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&rt6_exception_lock)); if (bucket) { @@ -1790,6 +1819,7 @@ void rt6_age_exceptions(struct fib6_info *rt, } } spin_unlock(&rt6_exception_lock); +out: rcu_read_unlock_bh(); } @@ -2596,6 +2626,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, struct in6_addr *saddr) { + struct fib_nh_common *nhc = &f6i->fib6_nh.nh_common; struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; struct in6_addr *src_key; @@ -2614,7 +2645,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, src_key = saddr; #endif - bucket = rcu_dereference(f6i->rt6i_exception_bucket); + bucket = rcu_dereference(nhc->nhc_exceptions); rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU); @@ -3011,6 +3042,17 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, void fib6_nh_release(struct fib6_nh *fib6_nh) { + struct fib_nh_common *nhc = &fib6_nh->nh_common; + struct rt6_exception_bucket *bucket; + + fib6_nh_flush_exceptions(fib6_nh, NULL); + + bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); + if (bucket) { + rcu_assign_pointer(nhc->nhc_exceptions, NULL); + kfree(bucket); + } + fib_nh_common_release(&fib6_nh->nh_common); } -- 2.11.0