RCU grace period is needed for lockless qdiscs added in the commit c5ad119fb6c09 ("net: sched: pfifo_fast use skb_array").
It is needed now that qdiscs may be lockless otherwise we risk free'ing a qdisc that is still in use from datapath. Additionally, push list cleanup into RCU callback. Otherwise we risk the datapath adding skbs during removal. Fixes: c5ad119fb6c09 ("net: sched: pfifo_fast use skb_array") Signed-off-by: John Fastabend <john.fastab...@gmail.com> --- include/net/sch_generic.h | 1 + net/sched/sch_generic.c | 50 ++++++++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index bc6b25f..a65306b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -97,6 +97,7 @@ struct Qdisc { unsigned long state; struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; + struct rcu_head rcu_head; int padded; refcount_t refcnt; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 876fab2..ab497ef 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -873,31 +873,15 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -static void qdisc_free(struct Qdisc *qdisc) +static void qdisc_rcu_free(struct rcu_head *head) { - if (qdisc_is_percpu_stats(qdisc)) { - free_percpu(qdisc->cpu_bstats); - free_percpu(qdisc->cpu_qstats); - } - - kfree((char *) qdisc - qdisc->padded); -} - -void qdisc_destroy(struct Qdisc *qdisc) -{ - const struct Qdisc_ops *ops = qdisc->ops; + struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); + const struct Qdisc_ops *ops = qdisc->ops; struct sk_buff *skb, *tmp; - if (qdisc->flags & TCQ_F_BUILTIN || - !refcount_dec_and_test(&qdisc->refcnt)) - return; - -#ifdef CONFIG_NET_SCHED - qdisc_hash_del(qdisc); - - qdisc_put_stab(rtnl_dereference(qdisc->stab)); -#endif - gen_kill_estimator(&qdisc->rate_est); + /* At this point no outstanding references to this Qdisc should + * exist in the datapath so its safe to clean up skb lists, etc. + */ if (ops->reset) ops->reset(qdisc); if (ops->destroy) @@ -916,7 +900,27 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(skb); } - qdisc_free(qdisc); + if (qdisc_is_percpu_stats(qdisc)) { + free_percpu(qdisc->cpu_bstats); + free_percpu(qdisc->cpu_qstats); + } + + kfree((char *) qdisc - qdisc->padded); +} + +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !refcount_dec_and_test(&qdisc->refcnt)) + return; + +#ifdef CONFIG_NET_SCHED + qdisc_hash_del(qdisc); + + qdisc_put_stab(rtnl_dereference(qdisc->stab)); +#endif + gen_kill_estimator(&qdisc->rate_est); + call_rcu(&qdisc->rcu_head, qdisc_rcu_free); } EXPORT_SYMBOL(qdisc_destroy);