On Tue, Feb 13, 2018 at 6:54 PM, Eyal Birger <[email protected]> wrote: > In setups like the following: > > Host A -- Host B > tun0 -- ipsec -- eth0 -- eth0 -- ipsec -- tun0 > > where tun0 are tunnel devices using dst_cache (ipip, ipip6, etc...). > > Unregistration of an underlying eth0 device leads to the following log > messages: > > unregister_netdevice: waiting for eth0 to become free. Usage count = 2 > > This is since xfrm dsts device references are not released upon device > unregistration when the xfrm dst is cached in a dst_cache. > > This issue was first introduced in commit 52df157f17e5 > ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle") > as part of an effort to remove routing garbage collection. > > Several approaches for fixing this were discussed in [1]; this commit keeps > track of allocated xdsts and releases their device references on a netdev > unregister/down events. > > Signed-off-by: Eyal Birger <[email protected]> > Fixes: 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst > bundle") I had another fix [1] for this issue in my local tree. That reuses uncached_list to track xdsts, and fewer job to do for this. As naturally uncached_list is already the list that is supposed to do this job. We don't have to introduce a new list and members.
[1] https://paste.fedoraproject.org/paste/X8FQzEXjGXQBTKAVLnDn4w Not sure if it makes more sense for xfrm? Thanks. > > [1] https://patchwork.ozlabs.org/patch/869025/ > > --- > > v3: > - add 'Fixes' tag and document appropriately > v2: > - call gc flush from existing netdev notifier per Shannon Nelson's > suggestion. > --- > include/net/xfrm.h | 11 +++------ > net/xfrm/xfrm_device.c | 2 ++ > net/xfrm/xfrm_policy.c | 66 > ++++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 72 insertions(+), 7 deletions(-) > > diff --git a/include/net/xfrm.h b/include/net/xfrm.h > index 7d20776..4614047 100644 > --- a/include/net/xfrm.h > +++ b/include/net/xfrm.h > @@ -324,6 +324,7 @@ void xfrm_policy_unregister_afinfo(const struct > xfrm_policy_afinfo *afinfo); > void km_policy_notify(struct xfrm_policy *xp, int dir, > const struct km_event *c); > void xfrm_policy_cache_flush(void); > +void xfrm_policy_dst_gc_flush_dev(struct net_device *dev); > void km_state_notify(struct xfrm_state *x, const struct km_event *c); > > struct xfrm_tmpl; > @@ -994,6 +995,8 @@ struct xfrm_dst { > u32 child_mtu_cached; > u32 route_cookie; > u32 path_cookie; > + struct list_head xfrm_dst_gc; > + struct xfrm_dst_list *xfrm_dst_gc_list; > }; > > static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) > @@ -1025,13 +1028,7 @@ static inline void xfrm_dst_set_child(struct xfrm_dst > *xdst, struct dst_entry *c > xdst->child = child; > } > > -static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) > -{ > - xfrm_pols_put(xdst->pols, xdst->num_pols); > - dst_release(xdst->route); > - if (likely(xdst->u.dst.xfrm)) > - xfrm_state_put(xdst->u.dst.xfrm); > -} > +void xfrm_dst_destroy(struct xfrm_dst *xdst); > #endif > > void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); > diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c > index 8e70291..5bd9892 100644 > --- a/net/xfrm/xfrm_device.c > +++ b/net/xfrm/xfrm_device.c > @@ -309,6 +309,7 @@ static int xfrm_dev_register(struct net_device *dev) > static int xfrm_dev_unregister(struct net_device *dev) > { > xfrm_policy_cache_flush(); > + xfrm_policy_dst_gc_flush_dev(dev); > return NOTIFY_DONE; > } > > @@ -323,6 +324,7 @@ static int xfrm_dev_down(struct net_device *dev) > xfrm_dev_state_flush(dev_net(dev), dev, true); > > xfrm_policy_cache_flush(); > + xfrm_policy_dst_gc_flush_dev(dev); > return NOTIFY_DONE; > } > > diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c > index 7a23078..c7b9a9a 100644 > --- a/net/xfrm/xfrm_policy.c > +++ b/net/xfrm/xfrm_policy.c > @@ -45,6 +45,12 @@ struct xfrm_flo { > u8 flags; > }; > > +struct xfrm_dst_list { > + spinlock_t lock; /* sync writers */ > + struct list_head head; > +}; > + > +static DEFINE_PER_CPU_ALIGNED(struct xfrm_dst_list, xfrm_dst_gc_list); > static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); > static struct work_struct *xfrm_pcpu_work __read_mostly; > static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); > @@ -94,6 +100,51 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, > const struct flowi *fl) > (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); > } > > +static void xfrm_dst_add_to_gc_list(struct xfrm_dst *xdst) > +{ > + struct xfrm_dst_list *xl = raw_cpu_ptr(&xfrm_dst_gc_list); > + > + xdst->xfrm_dst_gc_list = xl; > + > + spin_lock_bh(&xl->lock); > + list_add_tail(&xdst->xfrm_dst_gc, &xl->head); > + spin_unlock_bh(&xl->lock); > +} > + > +void xfrm_dst_destroy(struct xfrm_dst *xdst) > +{ > + xfrm_pols_put(xdst->pols, xdst->num_pols); > + dst_release(xdst->route); > + if (likely(xdst->u.dst.xfrm)) > + xfrm_state_put(xdst->u.dst.xfrm); > + if (!list_empty(&xdst->xfrm_dst_gc)) { > + struct xfrm_dst_list *xl = xdst->xfrm_dst_gc_list; > + > + spin_lock_bh(&xl->lock); > + list_del(&xdst->xfrm_dst_gc); > + spin_unlock_bh(&xl->lock); > + } > +} > +EXPORT_SYMBOL_GPL(xfrm_dst_destroy); > + > +void xfrm_policy_dst_gc_flush_dev(struct net_device *dev) > +{ > + struct xfrm_dst *xdst; > + int cpu; > + > + for_each_possible_cpu(cpu) { > + struct xfrm_dst_list *xl = &per_cpu(xfrm_dst_gc_list, cpu); > + > + spin_lock_bh(&xl->lock); > + list_for_each_entry(xdst, &xl->head, xfrm_dst_gc) { > + if (xdst->u.dst.dev != dev) > + continue; > + dst_dev_put(&xdst->u.dst); > + } > + spin_unlock_bh(&xl->lock); > + } > +} > + > bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi > *fl, > unsigned short family) > { > @@ -1581,6 +1632,8 @@ static struct dst_entry *xfrm_bundle_create(struct > xfrm_policy *policy, > } > > bundle[i] = xdst; > + xfrm_dst_add_to_gc_list(xdst); > + > if (!xdst_prev) > xdst0 = xdst; > else > @@ -2984,6 +3037,18 @@ static struct pernet_operations __net_initdata > xfrm_net_ops = { > .exit = xfrm_net_exit, > }; > > +static void __init xfrm_dst_gc_init(void) > +{ > + int cpu; > + > + for_each_possible_cpu(cpu) { > + struct xfrm_dst_list *xl = &per_cpu(xfrm_dst_gc_list, cpu); > + > + INIT_LIST_HEAD(&xl->head); > + spin_lock_init(&xl->lock); > + } > +} > + > void __init xfrm_init(void) > { > int i; > @@ -2998,6 +3063,7 @@ void __init xfrm_init(void) > register_pernet_subsys(&xfrm_net_ops); > seqcount_init(&xfrm_policy_hash_generation); > xfrm_input_init(); > + xfrm_dst_gc_init(); > } > > #ifdef CONFIG_AUDITSYSCALL > -- > 2.7.4 >
