On Wed 21 Aug 2019 at 01:32, Matthew Wilcox <wi...@infradead.org> wrote: > From: "Matthew Wilcox (Oracle)" <wi...@infradead.org> > > Remove the hw_filter list in favour of using one of the XArray mark > bits which lets us iterate more efficiently than walking a linked list. > > Signed-off-by: Matthew Wilcox (Oracle) <wi...@infradead.org> > --- > net/sched/cls_flower.c | 47 ++++++++++-------------------------------- > 1 file changed, 11 insertions(+), 36 deletions(-) > > diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c > index 2a1999d2b507..4625de5e29a7 100644 > --- a/net/sched/cls_flower.c > +++ b/net/sched/cls_flower.c > @@ -85,11 +85,12 @@ struct fl_flow_tmplt { > struct tcf_chain *chain; > }; > > +#define HW_FILTER XA_MARK_1 > + > struct cls_fl_head { > struct rhashtable ht; > spinlock_t masks_lock; /* Protect masks list */ > struct list_head masks; > - struct list_head hw_filters; > struct rcu_work rwork; > struct xarray filters; > }; > @@ -102,7 +103,6 @@ struct cls_fl_filter { > struct tcf_result res; > struct fl_flow_key key; > struct list_head list; > - struct list_head hw_list; > u32 handle; > u32 flags; > u32 in_hw_count; > @@ -332,7 +332,6 @@ static int fl_init(struct tcf_proto *tp) > > spin_lock_init(&head->masks_lock); > INIT_LIST_HEAD_RCU(&head->masks); > - INIT_LIST_HEAD(&head->hw_filters); > rcu_assign_pointer(tp->root, head); > xa_init_flags(&head->filters, XA_FLAGS_ALLOC1); > > @@ -421,7 +420,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, > struct cls_fl_filter *f, > > tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); > spin_lock(&tp->lock); > - list_del_init(&f->hw_list); > tcf_block_offload_dec(block, &f->flags); > spin_unlock(&tp->lock); > > @@ -433,7 +431,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, > struct cls_fl_filter *f, bool rtnl_held, > struct netlink_ext_ack *extack) > { > - struct cls_fl_head *head = fl_head_dereference(tp); > struct tcf_block *block = tp->chain->block; > struct flow_cls_offload cls_flower = {}; > bool skip_sw = tc_skip_sw(f->flags); > @@ -485,9 +482,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, > goto errout; > } > > - spin_lock(&tp->lock); > - list_add(&f->hw_list, &head->hw_filters); > - spin_unlock(&tp->lock); > errout: > if (!rtnl_held) > rtnl_unlock(); > @@ -1581,7 +1575,6 @@ static int fl_change(struct net *net, struct sk_buff > *in_skb, > err = -ENOBUFS; > goto errout_tb; > } > - INIT_LIST_HEAD(&fnew->hw_list); > refcount_set(&fnew->refcnt, 1); > > err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0); > @@ -1698,6 +1691,11 @@ static int fl_change(struct net *net, struct sk_buff > *in_skb, > > *arg = fnew; > > + if (!tc_skip_hw(fnew->flags)) > + xa_set_mark(&head->filters, fnew->handle, HW_FILTER); > + else if (fold) > + xa_clear_mark(&head->filters, fnew->handle, HW_FILTER); > +
I like how xa mark simplifies reoffload handling, but this wouldn't work anymore because without rtnl protection fl_change()/fl_delete() can be called concurrently with fl_reoffload(). My original implementation of unlocked flower classifier relied on idr in fl_reoffload() and we had to introduce hw_list due to following race conditions: - fl_reoffload() can miss fnew if it runs after fnew was provisioned to hardware with fl_hw_replace_filter() but before it is marked with HW_FILTER. - Another race condition would be in __fl_delete() when filter is removed from xarray, then shared block is detached concurrently which causes fl_reoffload() that misses the filter, then the block callback is no longer present when fl_hw_destroy_filter() calls tc_setup_cb_call() and we have a dangling filter that can't be removed from hardware anymore. That is why filter must be added to hw_list where it is done now - in fl_hw*() functions while holding rtnl lock to prevent concurrent reoffload (block bind/unbind always take rtnl). I guess marking/unmarking filters as HW_FILTER in exactly the same places where it is inserted/removed from hw_list would also work. > kfree(tb); > tcf_queue_work(&mask->rwork, fl_uninit_mask_free_work); > return 0; > @@ -1770,37 +1768,14 @@ static void fl_walk(struct tcf_proto *tp, struct > tcf_walker *arg, > arg->cookie = id; > } > > -static struct cls_fl_filter * > -fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool > add) > -{ > - struct cls_fl_head *head = fl_head_dereference(tp); > - > - spin_lock(&tp->lock); > - if (list_empty(&head->hw_filters)) { > - spin_unlock(&tp->lock); > - return NULL; > - } > - > - if (!f) > - f = list_entry(&head->hw_filters, struct cls_fl_filter, > - hw_list); > - list_for_each_entry_continue(f, &head->hw_filters, hw_list) { > - if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) { > - spin_unlock(&tp->lock); > - return f; > - } > - } > - > - spin_unlock(&tp->lock); > - return NULL; > -} > - > static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, > void *cb_priv, struct netlink_ext_ack *extack) > { > + struct cls_fl_head *head = fl_head_dereference(tp); > struct tcf_block *block = tp->chain->block; > struct flow_cls_offload cls_flower = {}; > - struct cls_fl_filter *f = NULL; > + struct cls_fl_filter *f; > + unsigned long handle; > int err; > > /* hw_filters list can only be changed by hw offload functions after > @@ -1809,7 +1784,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, > flow_setup_cb_t *cb, > */ > ASSERT_RTNL(); > > - while ((f = fl_get_next_hw_filter(tp, f, add))) { > + xa_for_each_marked(&head->filters, handle, f, HW_FILTER) { > cls_flower.rule = > flow_rule_alloc(tcf_exts_num_actions(&f->exts)); > if (!cls_flower.rule) {