On Fri, 2016-11-25 at 20:11 -0800, Eric Dumazet wrote:
> On Fri, 2016-11-25 at 19:15 -0700, subas...@codeaurora.org wrote:
> > We are seeing a crash due to gen_lock mutex being acquired in RCU 
> > context.
> > Crash is seen on a 4.4 based kernel ARM64 device. This occurred in a
> > regression rack, so unfortunately I don't have steps for a reproducer.
> > 
> > It looks like freeing socket in RCU was brought in through commit
> > 21e4902aea80ef35afc00ee8d2abdea4f519b7f7 ("netlink: Lockless lookup with
> > RCU grace period in socket release").
> > I am not very familiar with generic netlink sockets so I am not sure
> > if there is any other way to fix this apart from reverting this patch.
> > 
> > Any pointers to debug this would be appreciated.
> > 
> > Here is the call stack -
> > 
> > BUG: sleeping function called from invalid context 
> > kernel/locking/mutex.c:98
> > in_atomic(): 1, irqs_disabled(): 0, pid: 16400, name: busybox
> > [<ffffff80080cad20>] ___might_sleep+0x134/0x144
> > [<ffffff80080cadac>] __might_sleep+0x7c/0x8c
> > [<ffffff8008ef09a8>] mutex_lock+0x2c/0x4c
> > [<ffffff8008d307f0>] genl_lock+0x1c/0x24
> > [<ffffff8008d30848>] genl_lock_done+0x2c/0x50
> > [<ffffff8008d2ccac>] netlink_sock_destruct+0x30/0x94
> > [<ffffff8008cdef44>] sk_destruct+0x2c/0x150
> > [<ffffff8008cdf104>] __sk_free+0x9c/0xc4
> > [<ffffff8008cdf16c>] sk_free+0x40/0x4c
> > [<ffffff8008d2c7fc>] deferred_put_nlk_sk+0x40/0x4c
> > [<ffffff800810b104>] rcu_process_callbacks+0x4d4/0x644
> > [<ffffff80080a6598>] __do_softirq+0x1b8/0x3c4
> > [<ffffff80080a6a60>] irq_exit+0x80/0xd4
> > [<ffffff800808e554>] handle_IPI+0x1c0/0x364
> > [<ffffff80080817f8>] gic_handle_irq+0x154/0x1a4
> 
> Right, Thomas commit looks buggy.
> 
> Unfortunately the proper infra was added later in commit 
> a4298e4522d687a79af8 ("net: add SOCK_RCU_FREE socket flag")
> 
> I guess we should backport it, then apply following (untested) fix
> 
> Could you test this solution ? Thanks !
> 
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 62bea4591054..fe0d43314198 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -456,8 +456,9 @@ static struct sock *netlink_lookup(struct net *net, int 
> protocol, u32 portid)
>  
>       rcu_read_lock();
>       sk = __netlink_lookup(table, portid, net);
> -     if (sk)
> -             sock_hold(sk);
> +     if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
> +             sk = NULL;
> +
>       rcu_read_unlock();
>  
>       return sk;
> @@ -581,6 +582,7 @@ static int __netlink_create(struct net *net, struct 
> socket *sock,
>       }
>       init_waitqueue_head(&nlk->wait);
>  
> +     sock_set_flag(sk, SOCK_RCU_FREE);
>       sk->sk_destruct = netlink_sock_destruct;
>       sk->sk_protocol = protocol;

Oh well, this wont work, since sk->sk_destruct will be called from RCU
callback.

Grabbing the mutex should not be done from netlink_sock_destruct() but
from netlink_release()

Maybe this patch would be better :

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 62bea4591054..cce10e3c9b68 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -324,16 +324,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, 
struct sock *sk)
 
 static void netlink_sock_destruct(struct sock *sk)
 {
-       struct netlink_sock *nlk = nlk_sk(sk);
-
-       if (nlk->cb_running) {
-               if (nlk->cb.done)
-                       nlk->cb.done(&nlk->cb);
-
-               module_put(nlk->cb.module);
-               kfree_skb(nlk->cb.skb);
-       }
-
        skb_queue_purge(&sk->sk_receive_queue);
 
        if (!sock_flag(sk, SOCK_DEAD)) {
@@ -456,8 +446,9 @@ static struct sock *netlink_lookup(struct net *net, int 
protocol, u32 portid)
 
        rcu_read_lock();
        sk = __netlink_lookup(table, portid, net);
-       if (sk)
-               sock_hold(sk);
+       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+               sk = NULL;
+
        rcu_read_unlock();
 
        return sk;
@@ -581,6 +572,7 @@ static int __netlink_create(struct net *net, struct socket 
*sock,
        }
        init_waitqueue_head(&nlk->wait);
 
+       sock_set_flag(sk, SOCK_RCU_FREE);
        sk->sk_destruct = netlink_sock_destruct;
        sk->sk_protocol = protocol;
        return 0;
@@ -645,13 +637,6 @@ static int netlink_create(struct net *net, struct socket 
*sock, int protocol,
        goto out;
 }
 
-static void deferred_put_nlk_sk(struct rcu_head *head)
-{
-       struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
-
-       sock_put(&nlk->sk);
-}
-
 static int netlink_release(struct socket *sock)
 {
        struct sock *sk = sock->sk;
@@ -724,7 +709,19 @@ static int netlink_release(struct socket *sock)
        local_bh_disable();
        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
        local_bh_enable();
-       call_rcu(&nlk->rcu, deferred_put_nlk_sk);
+       if (nlk->cb_running) {
+               mutex_lock(nlk->cb_mutex);
+               if (nlk->cb_running) {
+                       if (nlk->cb.done)
+                               nlk->cb.done(&nlk->cb);
+       
+                       module_put(nlk->cb.module);
+                       kfree_skb(nlk->cb.skb);
+                       nlk->cb_running = false;
+               }
+               mutex_unlock(nlk->cb_mutex);
+       }
+       sock_put(sk);
        return 0;
 }
 
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 3cfd6cc60504..5dc08a7b0a2b 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -32,7 +32,6 @@ struct netlink_sock {
        struct module           *module;
 
        struct rhash_head       node;
-       struct rcu_head         rcu;
 };
 
 static inline struct netlink_sock *nlk_sk(struct sock *sk)


Reply via email to