Mon, Feb 04, 2019 at 01:32:46PM CET, vla...@mellanox.com wrote: >Currently, tcf_block doesn't use any synchronization mechanisms to protect >critical sections that manage lifetime of its chains. block->chain_list and >multiple variables in tcf_chain that control its lifetime assume external >synchronization provided by global rtnl lock. Converting chain reference >counting to atomic reference counters is not possible because cls API uses >multiple counters and flags to control chain lifetime, so all of them must >be synchronized in chain get/put code. > >Use single per-block lock to protect block data and manage lifetime of all >chains on the block. Always take block->lock when accessing chain_list. >Chain get and put modify chain lifetime-management data and parent block's >chain_list, so take the lock in these functions. Verify block->lock state >with assertions in functions that expect to be called with the lock taken >and are called from multiple places. Take block->lock when accessing >filter_chain_list. > >In order to allow parallel update of rules on single block, move all calls >to classifiers outside of critical sections protected by new block->lock. >Rearrange chain get and put functions code to only access protected chain >data while holding block lock: >- Check if chain was explicitly created inside put function while holding > block lock. Add additional argument to __tcf_chain_put() to only put > explicitly created chain. >- Rearrange code to only access chain reference counter and chain action > reference counter while holding block lock. >- Extract code that requires block->lock from tcf_chain_destroy() into > standalone tcf_chain_destroy() function that is called by > __tcf_chain_put() in same critical section that changes chain reference > counters. > >Signed-off-by: Vlad Buslov <vla...@mellanox.com> >--- > >Changes from V2 to V3: > - Change block->lock type to mutex. > - Implement tcf_block_destroy() helper function that destroys > block->lock mutex before deallocating the block. > - Revert GFP_KERNEL->GFP_ATOMIC memory allocation flags of tcf_chain > which is no longer needed after block->lock type change. > > include/net/sch_generic.h | 5 +++ > net/sched/cls_api.c | 102 ++++++++++++++++++++++++++++++++++++++-------- > 2 files changed, 89 insertions(+), 18 deletions(-) > >diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h >index 7a4957599874..31b8ea66a47d 100644 >--- a/include/net/sch_generic.h >+++ b/include/net/sch_generic.h >@@ -12,6 +12,7 @@ > #include <linux/list.h> > #include <linux/refcount.h> > #include <linux/workqueue.h> >+#include <linux/mutex.h> > #include <net/gen_stats.h> > #include <net/rtnetlink.h> > >@@ -352,6 +353,10 @@ struct tcf_chain { > }; > > struct tcf_block { >+ /* Lock protects tcf_block and lifetime-management data of chains >+ * attached to the block (refcnt, action_refcnt, explicitly_created). >+ */ >+ struct mutex lock; > struct list_head chain_list; > u32 index; /* block index for shared blocks */ > refcount_t refcnt; >diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c >index e2b5cb2eb34e..cc416b6a3aa2 100644 >--- a/net/sched/cls_api.c >+++ b/net/sched/cls_api.c >@@ -193,6 +193,9 @@ static void tcf_proto_destroy(struct tcf_proto *tp, > kfree_rcu(tp, rcu); > } > >+#define ASSERT_BLOCK_LOCKED(block) \ >+ lockdep_assert_held(&(block)->lock) >+ > struct tcf_filter_chain_list_item { > struct list_head list; > tcf_chain_head_change_t *chain_head_change; >@@ -204,6 +207,8 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block >*block, > { > struct tcf_chain *chain; > >+ ASSERT_BLOCK_LOCKED(block); >+ > chain = kzalloc(sizeof(*chain), GFP_KERNEL); > if (!chain) > return NULL; >@@ -235,25 +240,51 @@ static void tcf_chain0_head_change(struct tcf_chain >*chain, > tcf_chain_head_change_item(item, tp_head); > } > >-static void tcf_chain_destroy(struct tcf_chain *chain) >+/* Returns true if block can be safely freed. */ >+ >+static bool tcf_chain_detach(struct tcf_chain *chain) > { > struct tcf_block *block = chain->block; > >+ ASSERT_BLOCK_LOCKED(block); >+ > list_del(&chain->list); > if (!chain->index) > block->chain0.chain = NULL; >+ >+ if (list_empty(&block->chain_list) && >+ refcount_read(&block->refcnt) == 0) >+ return true; >+ >+ return false; >+} >+ >+static void tcf_block_destroy(struct tcf_block *block) >+{ >+ mutex_destroy(&block->lock); >+ kfree_rcu(block, rcu); >+} >+ >+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) >+{ >+ struct tcf_block *block = chain->block; >+ > kfree(chain); >- if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt)) >- kfree_rcu(block, rcu); >+ if (free_block) >+ tcf_block_destroy(block); > } > > static void tcf_chain_hold(struct tcf_chain *chain) > { >+ ASSERT_BLOCK_LOCKED(chain->block); >+ > ++chain->refcnt; > } > > static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) > { >+ ASSERT_BLOCK_LOCKED(chain->block); >+ > /* In case all the references are action references, this > * chain should not be shown to the user. > */ >@@ -265,6 +296,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block >*block, > { > struct tcf_chain *chain; > >+ ASSERT_BLOCK_LOCKED(block); >+ > list_for_each_entry(chain, &block->chain_list, list) { > if (chain->index == chain_index) > return chain; >@@ -279,31 +312,40 @@ static struct tcf_chain *__tcf_chain_get(struct >tcf_block *block, > u32 chain_index, bool create, > bool by_act) > { >- struct tcf_chain *chain = tcf_chain_lookup(block, chain_index); >+ struct tcf_chain *chain = NULL; >+ bool is_first_reference; > >+ mutex_lock(&block->lock); >+ chain = tcf_chain_lookup(block, chain_index); > if (chain) { > tcf_chain_hold(chain); > } else { > if (!create) >- return NULL; >+ goto errout; > chain = tcf_chain_create(block, chain_index); > if (!chain) >- return NULL; >+ goto errout; > } > > if (by_act) > ++chain->action_refcnt; >+ is_first_reference = chain->refcnt - chain->action_refcnt == 1; >+ mutex_unlock(&block->lock); > > /* Send notification only in case we got the first > * non-action reference. Until then, the chain acts only as > * a placeholder for actions pointing to it and user ought > * not know about them. > */ >- if (chain->refcnt - chain->action_refcnt == 1 && !by_act) >+ if (is_first_reference && !by_act) > tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, > RTM_NEWCHAIN, false); > > return chain; >+ >+errout: >+ mutex_unlock(&block->lock); >+ return chain; > } > > static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 > chain_index, >@@ -320,37 +362,59 @@ EXPORT_SYMBOL(tcf_chain_get_by_act); > > static void tc_chain_tmplt_del(struct tcf_chain *chain); > >-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) >+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, >+ bool explicitly_created) > { >+ struct tcf_block *block = chain->block; >+ bool is_last, free_block = false; >+ unsigned int refcnt; >+ >+ mutex_lock(&block->lock); >+ if (explicitly_created) { >+ if (!chain->explicitly_created) { >+ mutex_unlock(&block->lock); >+ return; >+ } >+ chain->explicitly_created = false;
Hmm, I think that you left "chain->explicitly_created = false" at the original location (tc_ctl_chain()). I think it would be better to do the chain->explicitly_created management move in a separate patch. >+ } >+ > if (by_act) > chain->action_refcnt--; >- chain->refcnt--; >+ >+ /* tc_chain_notify_delete can't be called while holding block lock. >+ * However, when block is unlocked chain can be changed concurrently, so >+ * save these to temporary variables. >+ */ >+ refcnt = --chain->refcnt; >+ is_last = refcnt - chain->action_refcnt == 0; >+ if (refcnt == 0) >+ free_block = tcf_chain_detach(chain); >+ mutex_unlock(&block->lock); > > /* The last dropped non-action reference will trigger notification. */ >- if (chain->refcnt - chain->action_refcnt == 0 && !by_act) >+ if (is_last && !by_act) > tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false); > >- if (chain->refcnt == 0) { >+ if (refcnt == 0) { > tc_chain_tmplt_del(chain); >- tcf_chain_destroy(chain); >+ tcf_chain_destroy(chain, free_block); > } > } > > static void tcf_chain_put(struct tcf_chain *chain) > { >- __tcf_chain_put(chain, false); >+ __tcf_chain_put(chain, false, false); > } > > void tcf_chain_put_by_act(struct tcf_chain *chain) > { >- __tcf_chain_put(chain, true); >+ __tcf_chain_put(chain, true, false); > } > EXPORT_SYMBOL(tcf_chain_put_by_act); > > static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) > { >- if (chain->explicitly_created) >- tcf_chain_put(chain); >+ __tcf_chain_put(chain, false, true); > } > > static void tcf_chain_flush(struct tcf_chain *chain) >@@ -764,6 +828,7 @@ static struct tcf_block *tcf_block_create(struct net *net, >struct Qdisc *q, > NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); > return ERR_PTR(-ENOMEM); > } >+ mutex_init(&block->lock); > INIT_LIST_HEAD(&block->chain_list); > INIT_LIST_HEAD(&block->cb_list); > INIT_LIST_HEAD(&block->owner_list); >@@ -827,7 +892,7 @@ static void tcf_block_put_all_chains(struct tcf_block >*block) > static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, > struct tcf_block_ext_info *ei) > { >- if (refcount_dec_and_test(&block->refcnt)) { >+ if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { > /* Flushing/putting all chains will cause the block to be > * deallocated when last chain is freed. However, if chain_list > * is empty, block has to be manually deallocated. After block >@@ -836,6 +901,7 @@ static void __tcf_block_put(struct tcf_block *block, >struct Qdisc *q, > */ > bool free_block = list_empty(&block->chain_list); > >+ mutex_unlock(&block->lock); > if (tcf_block_shared(block)) > tcf_block_remove(block, block->net); > if (!free_block) >@@ -845,7 +911,7 @@ static void __tcf_block_put(struct tcf_block *block, >struct Qdisc *q, > tcf_block_offload_unbind(block, q, ei); > > if (free_block) >- kfree_rcu(block, rcu); >+ tcf_block_destroy(block); > else > tcf_block_put_all_chains(block); > } else if (q) { >-- >2.13.6 >