Race Condition Observed in ARP Processing.

Chinmay Agarwal Tue, 29 Dec 2020 08:07:13 -0800

Hi All,

We found a crash while performing some automated stress tests on a 5.4 kernel 
based device.


We found out that it there is a freed neighbour address which was still part of 
the gc_list and was leading to crash.
Upon adding some debugs and checking neigh_put/neigh_hold/neigh_destroy calls 
stacks, looks like there is a possibility of a Race condition happening in the 
code.
The issue could be reproduced with some effort.

[35156.263929]  Call trace:
[35156.263948]  dump_backtrace+0x0/0x1d0
[35156.263963]  show_stack+0x18/0x24 
[35156.263982]  dump_stack+0xcc/0x11c 
[35156.264003]  print_address_description+0x88/0x578
[35156.264021]  __kasan_report+0x1b4/0x1d0
[35156.264037]   kasan_report+0x14/0x20
[35156.264054]   __asan_load8+0x98/0x9c
[35156.264070]  __list_add_valid+0x3c/0xbc 
[35156.264091]  ___neigh_create+0xbbc/0xcd4
[35156.264108]   __neigh_create+0x18/0x24
[35156.264130]  ip_finish_output2+0x3c4/0x8d0 
[35156.264147]  __ip_finish_output+0x290/0x2bc
[35156.264164]   ip_finish_output+0x48/0x10c
[35156.264180]   ip_output+0x22c/0x27c
[35156.264196]  ip_send_skb+0x70/0x138
[35156.264215]  udp_send_skb+0x3a8/0x6a8 
[35156.264231]  udp_sendmsg+0xd70/0xe4c
[35156.264246]   inet_sendmsg+0x60/0x7c
[35156.264264]  __sys_sendto+0x240/0x2d4 
[35156.264280]  __arm64_sys_sendto+0x7c/0x98 
[35156.264300]  invoke_syscall+0x184/0x328 
[35156.264317]  el0_svc_common+0xc8/0x184 [
35156.264336]  el0_svc_handler+0x94/0xa4 [
35156.264352]  el0_svc+0x8/0xc

[35156.264379]  Allocated by task 1269:
[35156.264404]   __kasan_kmalloc+0x100/0x1c0
[35156.264421]   kasan_slab_alloc+0x18/0x24
[35156.264438]   __kmalloc_track_caller+0x2cc/0x374
[35156.264455]   __alloc_skb+0xa4/0x24c
[35156.264473]   alloc_skb_with_frags+0x80/0x260
[35156.264491]   sock_alloc_send_pskb+0x324/0x498
[35156.264512]   unix_dgram_sendmsg+0x308/0xd34
[35156.264530]   unix_seqpacket_sendmsg+0x88/0xd8
[35156.264548]  __sys_sendto+0x240/0x2d4
[35156.264564]   __arm64_sys_sendto+0x7c/0x98
[35156.264582]   invoke_syscall+0x184/0x328
[35156.264600]   el0_svc_common+0xc8/0x184
[35156.264617]  el0_svc_handler+0x94/0xa4
[35156.264632]   el0_svc+0x8/0xc

[35156.264655]  Freed by task 27859:
[35156.264678]  __kasan_slab_free+0x164/0x234 
[35156.264696]  kasan_slab_free+0x14/0x24 
[35156.264712]  slab_free_freelist_hook+0xe0/0x164
[35156.264726]  kfree+0x134/0x720
[35156.264743]  skb_release_data+0x298/0x2c8 
[35156.264759]  __kfree_skb+0x30/0xb0 
[35156.264774]  consume_skb+0x148/0x17c 
[35156.264792]  skb_free_datagram+0x1c/0x68 
[35156.264809]  unix_dgram_recvmsg+0x46c/0x4d4 
[35156.264827]  unix_seqpacket_recvmsg+0x5c/0x7c 
[35156.264843]  __sys_recvfrom+0x1d0/0x268 
[35156.264864]  __arm64_compat_sys_recvfrom+0x7c/0x98
[35156.264882]   invoke_syscall+0x184/0x328
[35156.264899]   el0_svc_common+0xb4/0x184
[35156.264918]   el0_svc_compat_handler+0x30/0x40
[35156.264933]  el0_svc_compat+0x8/0x24

Possible Race Condition:

CPU 1: 
(thread executing flow: arp_ifdown -> neigh_ifdown -> __neigh_ifdown -> 
neigh_flush_dev)

static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
                                                        bool skip_perm)
{
        int i;
        struct neigh_hash_table *nht;
        .....
        
        neigh_del_timer(n);
        neigh_mark_dead(n); --------> //ts1: neighbour entry gets marked as dead
                
        if (refcount_read(&n->refcnt) != 1) {
                /* The most unpleasant situation.
                   We must destroy neighbour entry,
                   but someone still uses it.

                   The destroy will be delayed until
                   the last user releases us, but
                   we must kill timers etc. and move
                   it to safe state.
                */
                   __skb_queue_purge(&n->arp_queue);
                   n->arp_queue_len_bytes = 0;
                   n->output = neigh_blackhole;
                   if (n->nud_state & NUD_VALID)
                        n->nud_state = NUD_NOARP;
                   else
                        n->nud_state = NUD_NONE;
                   neigh_dbg(2, "neigh %p is stray\n", n);
        }
        write_unlock(&n->lock);
        neigh_cleanup_and_release(n);  ---->  //ts3: reference decremented 
                                              //but destroy couldn't be done
                                              // as reference increased in ts2
        ....
                
                

CPU 2:
(thread executing flow: __netif_receive_skb -> __netif_receive_skb_core -> 
arp_rcv -> arp_process)

static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
{
.....
                
        /* Update our ARP tables */

        n = __neigh_lookup(&arp_tbl, &sip, dev, 0); ---->//ts2: reference
                                                        // taken on neighbour
                                
                ...
                if (n) {
                        int state = NUD_REACHABLE;
                        int override;
                        /* If several different ARP replies follows 
back-to-back,
                           use the FIRST one. It is possible, if several proxy
                           agents are active. Taking the first reply prevents
                           arp trashing and chooses the fastest router.
                        */
                        override = time_after(jiffies,
                                           n->updated +
                                           NEIGH_VAR(n->parms, LOCKTIME)) ||
                                           is_garp;

                        /* Broadcast replies and request packets
                        do not assert neighbour reachability.
                            */
                        if (arp->ar_op != htons(ARPOP_REPLY) ||
                                skb->pkt_type != PACKET_HOST)
                        state = NUD_STALE;
                        neigh_update(n, sha, state,
                                override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0);
        -------->       //ts4: neigh_update -> __neigh_update -> 
neigh_update_gc_list
                        neigh_release(n);---->  //ts5: release the neighbour 
                                                //entry and call destroy as 
refcount is 1.
                        }

        }

static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
                u8 new, u32 flags, u32 nlmsg_pid,
                struct netlink_ext_ack *extack)
{
...
                
                if (neigh->dead) {
                                NL_SET_ERR_MSG(extack, "Neighbor entry is now 
dead");
                                goto out;
                }
                .....
                        
                                
                out:
                        if (update_isrouter)
                                        neigh_update_is_router(neigh, flags, 
&notify);
                        write_unlock_bh(&neigh->lock);

                        if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 
                                        neigh_update_gc_list(neigh);
 //The test could have an old state set as permanent and new state changed to 
some other, which hits here?
}

                
                
The crash may have been due to out of order ARP replies.
As neighbour is marked dead should we go ahead with updating our ARP Tables?

Race Condition Observed in ARP Processing.

Reply via email to