In setups like the following: Host A -- Host B tun0 -- ipsec -- eth0 -- eth0 -- ipsec -- tun0
where tun0 are tunnel devices using dst_cache (ipip, ipip6, etc...). Unregistration of an underlying eth0 device leads to the following log messages: unregister_netdevice: waiting for eth0 to become free. Usage count = 2 This is since xfrm dsts device references are not released upon device unregistration when the xfrm dst is cached in a dst_cache. Several approaches for fixing this were discussed in [1]; this commit keeps track of allocated xdsts and releases their device references on a netdev unregister event. Signed-off-by: Eyal Birger <eyal.bir...@gmail.com> [1] https://patchwork.ozlabs.org/patch/869025/ --- include/net/xfrm.h | 10 ++----- net/xfrm/xfrm_policy.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7d20776..c1c8493 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -994,6 +994,8 @@ struct xfrm_dst { u32 child_mtu_cached; u32 route_cookie; u32 path_cookie; + struct list_head xfrm_dst_gc; + struct xfrm_dst_list *xfrm_dst_gc_list; }; static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) @@ -1025,13 +1027,7 @@ static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *c xdst->child = child; } -static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) -{ - xfrm_pols_put(xdst->pols, xdst->num_pols); - dst_release(xdst->route); - if (likely(xdst->u.dst.xfrm)) - xfrm_state_put(xdst->u.dst.xfrm); -} +void xfrm_dst_destroy(struct xfrm_dst *xdst); #endif void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7a23078..1da8a65 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -45,6 +45,12 @@ struct xfrm_flo { u8 flags; }; +struct xfrm_dst_list { + spinlock_t lock; /* sync writers */ + struct list_head head; +}; + +static DEFINE_PER_CPU_ALIGNED(struct xfrm_dst_list, xfrm_dst_gc_list); static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); static struct work_struct *xfrm_pcpu_work __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); @@ -94,6 +100,51 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); } +static void xfrm_dst_add_to_gc_list(struct xfrm_dst *xdst) +{ + struct xfrm_dst_list *xl = raw_cpu_ptr(&xfrm_dst_gc_list); + + xdst->xfrm_dst_gc_list = xl; + + spin_lock_bh(&xl->lock); + list_add_tail(&xdst->xfrm_dst_gc, &xl->head); + spin_unlock_bh(&xl->lock); +} + +void xfrm_dst_destroy(struct xfrm_dst *xdst) +{ + xfrm_pols_put(xdst->pols, xdst->num_pols); + dst_release(xdst->route); + if (likely(xdst->u.dst.xfrm)) + xfrm_state_put(xdst->u.dst.xfrm); + if (!list_empty(&xdst->xfrm_dst_gc)) { + struct xfrm_dst_list *xl = xdst->xfrm_dst_gc_list; + + spin_lock_bh(&xl->lock); + list_del(&xdst->xfrm_dst_gc); + spin_unlock_bh(&xl->lock); + } +} +EXPORT_SYMBOL_GPL(xfrm_dst_destroy); + +static void xfrm_flush_dev(struct net_device *dev) +{ + struct xfrm_dst *xdst; + int cpu; + + for_each_possible_cpu(cpu) { + struct xfrm_dst_list *xl = &per_cpu(xfrm_dst_gc_list, cpu); + + spin_lock_bh(&xl->lock); + list_for_each_entry(xdst, &xl->head, xfrm_dst_gc) { + if (xdst->u.dst.dev != dev) + continue; + dst_dev_put(&xdst->u.dst); + } + spin_unlock_bh(&xl->lock); + } +} + bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, unsigned short family) { @@ -1581,6 +1632,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } bundle[i] = xdst; + xfrm_dst_add_to_gc_list(xdst); + if (!xdst_prev) xdst0 = xdst; else @@ -2937,6 +2990,20 @@ static void xfrm_policy_fini(struct net *net) xfrm_hash_free(net->xfrm.policy_byidx, sz); } +static int xfrm_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event == NETDEV_UNREGISTER) + xfrm_flush_dev(dev); + return NOTIFY_DONE; +} + +static struct notifier_block xfrm_netdev_notifier = { + .notifier_call = xfrm_netdev_event, +}; + static int __net_init xfrm_net_init(struct net *net) { int rv; @@ -2984,6 +3051,19 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { .exit = xfrm_net_exit, }; +static void __init xfrm_dst_gc_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct xfrm_dst_list *xl = &per_cpu(xfrm_dst_gc_list, cpu); + + INIT_LIST_HEAD(&xl->head); + spin_lock_init(&xl->lock); + } + register_netdevice_notifier(&xfrm_netdev_notifier); +} + void __init xfrm_init(void) { int i; @@ -2998,6 +3078,7 @@ void __init xfrm_init(void) register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); + xfrm_dst_gc_init(); } #ifdef CONFIG_AUDITSYSCALL -- 2.7.4