On Wed, 20 Dec 2017 16:37:10 -0800, Jakub Kicinski wrote: > On Wed, 20 Dec 2017 16:03:49 -0800, Cong Wang wrote: > > On Wed, Dec 20, 2017 at 10:31 AM, Cong Wang <xiyou.wangc...@gmail.com> > > wrote: > > > On Wed, Dec 20, 2017 at 10:17 AM, Cong Wang <xiyou.wangc...@gmail.com> > > > wrote: > > >> > > >> I guess it is q->miniqp which is freed in qdisc_graft() without properly > > >> waiting for rcu readers? > > > > > > It is probably so, the call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func) > > > in the end of mini_qdisc_pair_swap() is invoked on miniq_old->rcu, > > > but miniq is being freed, no rcu barrier waits for it... > > > > > > You can try to add a rcu_barrier_bh() at the end to see if this crash > > > is gone, but I don't think people like adding yet another rcu barrier... > > > > > > > Hi, Jakub > > > > Can you test the following fix? I am not a fan of rcu barrier but we > > already have one so... > > > > diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c > > index 876fab2604b8..1b68fedea124 100644 > > --- a/net/sched/sch_generic.c > > +++ b/net/sched/sch_generic.c > > @@ -1240,6 +1240,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair > > *miniqp, > > > > if (!tp_head) { > > RCU_INIT_POINTER(*miniqp->p_miniq, NULL); > > + /* Wait for existing flying RCU callback before being > > freed. */ > > + rcu_barrier_bh(); > > return; > > } > > Looks good after 30 minutes, feel free to add if you post officially: > > Tested-by: Jakub Kicinski <jakub.kicin...@netronome.com>
Just as I hit send... :) but this looks unrelated, "Comm: sshd" - so probably from the management interface. [ 154.604041] ================================================================== [ 154.612245] BUG: KASAN: slab-out-of-bounds in pfifo_fast_dequeue+0x140/0x2d0 [ 154.620219] Read of size 8 at addr ffff88086bb64040 by task sshd/983 [ 154.627403] [ 154.629161] CPU: 10 PID: 983 Comm: sshd Not tainted 4.15.0-rc3-perf-00984-g82d3fc87a4aa-dirty #13 [ 154.639190] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.3.4 11/08/2016 [ 154.647665] Call Trace: [ 154.650494] dump_stack+0xa6/0x118 [ 154.654387] ? _atomic_dec_and_lock+0xe8/0xe8 [ 154.659355] ? trace_event_raw_event_rcu_torture_read+0x190/0x190 [ 154.666263] ? rcu_segcblist_enqueue+0xe9/0x120 [ 154.671422] ? _raw_spin_unlock_bh+0x91/0xc0 [ 154.676286] ? pfifo_fast_dequeue+0x140/0x2d0 [ 154.681251] print_address_description+0x6a/0x270 [ 154.686601] ? pfifo_fast_dequeue+0x140/0x2d0 [ 154.691565] kasan_report+0x23f/0x350 [ 154.695752] pfifo_fast_dequeue+0x140/0x2d0 [ 154.700523] __qdisc_run+0x264/0xa20 [ 154.704613] ? sch_direct_xmit+0x3d0/0x3d0 [ 154.709287] ? _raw_spin_unlock+0x73/0xc0 [ 154.713860] ? is_bpf_text_address+0x1e/0x30 [ 154.718724] ? kernel_text_address+0xec/0x100 [ 154.723687] ? __kernel_text_address+0xe/0x30 [ 154.728650] ? unwind_get_return_address+0x2f/0x50 [ 154.734099] ? pfifo_fast_enqueue+0x154/0x180 [ 154.739065] __dev_queue_xmit+0x5ae/0x1110 [ 154.743738] ? dst_alloc+0x8c/0xd0 [ 154.747633] ? netdev_pick_tx+0x150/0x150 [ 154.752206] ? ip_route_output_key_hash+0xee/0x130 [ 154.757654] ? ip_queue_xmit+0x7d0/0x830 [ 154.762131] ? tcp_transmit_skb+0xc52/0x15b0 [ 154.766994] ? tcp_write_xmit+0x425/0x2060 [ 154.771665] ? __tcp_push_pending_frames+0x56/0x110 [ 154.777209] ? tcp_push+0x2cf/0x360 [ 154.781200] ? tcp_sendmsg_locked+0xdb3/0x1cb0 [ 154.786259] ? tcp_sendmsg+0x27/0x40 [ 154.790347] ? inet_sendmsg+0xb3/0x1f0 [ 154.794629] ? sock_sendmsg+0x64/0x80 [ 154.798814] ? sock_write_iter+0x148/0x1f0 [ 154.803486] ? __vfs_write+0x26e/0x370 [ 154.807767] ? vfs_write+0xe9/0x240 [ 154.811747] ? SyS_write+0xa7/0x130 [ 154.815739] ? entry_SYSCALL_64_fastpath+0x1e/0x81 [ 154.821190] ? __alias_free_mem+0x20/0x20 [ 154.825766] ? rt_cache_route+0x143/0x170 [ 154.830342] ? find_busiest_group+0x12eb/0x1630 [ 154.835500] ? inet_lookup_ifaddr_rcu+0x126/0x170 [ 154.840852] ? percpu_counter_add_batch+0x24/0xa0 [ 154.846207] ? rt_cpu_seq_stop+0x10/0x10 [ 154.850684] ? dst_alloc+0xac/0xd0 [ 154.854579] ? rt_dst_alloc+0x1f0/0x250 [ 154.858958] ? ipv4_neigh_lookup+0x3a0/0x3a0 [ 154.863824] ? __rcu_read_unlock+0x6e/0x120 [ 154.868594] ? trace_event_raw_event_rcu_torture_read+0x190/0x190 [ 154.875502] ? ip_finish_output2+0x68d/0x7c0 [ 154.880366] ip_finish_output2+0x68d/0x7c0 [ 154.885040] ? ip_send_check+0x60/0x60 [ 154.889322] ? ip_route_input_noref+0xd0/0xd0 [ 154.894287] ? xfrm_lookup+0x888/0x10f0 [ 154.898668] ? ipv4_mtu+0x163/0x200 [ 154.902662] ? load_balance+0x108d/0x14a0 [ 154.907238] ? ip_finish_output+0x39a/0x4c0 [ 154.912004] ip_finish_output+0x39a/0x4c0 [ 154.916578] ? ip_fragment.constprop.5+0xf0/0xf0 [ 154.921832] ? find_busiest_group+0x1630/0x1630 [ 154.926991] ? check_cfs_rq_runtime+0x70/0x70 [ 154.931954] ? __rcu_read_unlock+0x6e/0x120 [ 154.936723] ? trace_event_raw_event_rcu_torture_read+0x190/0x190 [ 154.943630] ? unwind_get_return_address+0x2f/0x50 [ 154.949077] ? ip_send_check+0x20/0x60 [ 154.953360] ip_output+0x106/0x280 [ 154.957253] ? ip_mc_output+0x750/0x750 [ 154.961631] ? ip_route_output_key_hash_rcu+0x1240/0x1240 [ 154.967757] ? sk_setup_caps+0x180/0x180 [ 154.972236] ? __skb_clone+0x2f8/0x370 [ 154.976520] ip_queue_xmit+0x381/0x830 [ 154.980805] ? ip_build_and_send_pkt+0x420/0x420 [ 154.986060] ? trace_event_raw_event_bpf_obj_map+0x200/0x200 [ 154.992481] ? tcp_options_write+0xc3/0x360 [ 154.997248] ? tcp_established_options+0x122/0x190 [ 155.002697] tcp_transmit_skb+0xc52/0x15b0 [ 155.007374] ? __tcp_select_window+0x3c0/0x3c0 [ 155.012433] ? is_bpf_text_address+0x1e/0x30 [ 155.017296] ? kernel_text_address+0xec/0x100 [ 155.022259] ? __kernel_text_address+0xe/0x30 [ 155.027221] ? unwind_get_return_address+0x2f/0x50 [ 155.032670] ? __save_stack_trace+0x83/0xd0 [ 155.037437] ? memcmp+0x45/0x70 [ 155.041041] ? depot_save_stack+0x12d/0x470 [ 155.045811] ? tcp_small_queue_check.isra.4+0x10a/0x1f0 [ 155.051745] ? tcp_tso_segs+0xe0/0xe0 [ 155.055932] ? native_sched_clock+0xcc/0x130 [ 155.060799] ? cyc2ns_read_end+0x20/0x20 [ 155.065275] ? sock_sendmsg+0x64/0x80 [ 155.069460] ? vfs_write+0xe9/0x240 [ 155.073483] ? entry_SYSCALL_64_fastpath+0x1e/0x81 [ 155.078931] ? sock_sendmsg+0x64/0x80 [ 155.083116] ? sock_write_iter+0x148/0x1f0 [ 155.087790] ? sched_clock+0x5/0x10 [ 155.091780] ? deref_stack_reg+0x98/0xd0 [ 155.096257] ? sched_clock+0x5/0x10 [ 155.100248] ? sched_clock_cpu+0x14/0xf0 [ 155.104726] tcp_write_xmit+0x425/0x2060 [ 155.109209] ? memcg_kmem_get_cache+0x4e0/0x4e0 [ 155.114356] ? tcp_transmit_skb+0x15b0/0x15b0 [ 155.119318] ? memcg_kmem_put_cache+0x63/0x120 [ 155.124376] ? memcg_kmem_get_cache+0x4e0/0x4e0 [ 155.129536] ? __kmalloc_node_track_caller+0x1fe/0x2a0 [ 155.135371] ? __alloc_skb+0xed/0x390 [ 155.139558] ? __kmalloc_reserve.isra.7+0x43/0x80 [ 155.144908] ? memset+0x1f/0x40 [ 155.148510] ? __alloc_skb+0x302/0x390 [ 155.152792] ? __kmalloc_reserve.isra.7+0x80/0x80 [ 155.158142] ? ipv4_mtu+0x90/0x200 [ 155.162036] ? tcp_mtu_to_mss+0x155/0x1a0 [ 155.166610] ? ipv4_negative_advice+0x60/0x60 [ 155.171572] ? tcp_trim_head+0x260/0x260 [ 155.176048] ? SyS_read+0xa7/0x130 [ 155.179941] ? iov_iter_advance+0x16a/0x780 [ 155.184709] ? copyout+0x4f/0x60 [ 155.188410] ? tcp_established_options+0x122/0x190 [ 155.193858] ? import_single_range+0x110/0x110 [ 155.198918] __tcp_push_pending_frames+0x56/0x110 [ 155.204269] tcp_push+0x2cf/0x360 [ 155.208068] ? tcp_splice_data_recv+0xb0/0xb0 [ 155.213032] ? skb_entail+0x2e5/0x300 [ 155.217217] ? _copy_from_iter+0x680/0x680 [ 155.221890] ? _raw_spin_unlock_bh+0x91/0xc0 [ 155.226757] tcp_sendmsg_locked+0xdb3/0x1cb0 [ 155.231628] ? tcp_recvmsg+0x790/0x1420 [ 155.236001] ? tcp_sendpage+0x60/0x60 [ 155.240190] ? tcp_recv_timestamp+0x240/0x240 [ 155.245158] ? compat_poll_select_copy_remaining+0x310/0x310 [ 155.251582] ? compat_poll_select_copy_remaining+0x310/0x310 [ 155.258004] ? compat_poll_select_copy_remaining+0x310/0x310 [ 155.264427] ? __rcu_read_unlock+0xf8/0x120 [ 155.269197] ? trace_event_raw_event_rcu_torture_read+0x190/0x190 [ 155.276104] ? trace_event_raw_event_rcu_torture_read+0x190/0x190 [ 155.283000] ? _raw_spin_unlock+0x73/0xc0 [ 155.287573] ? _raw_spin_trylock+0xe0/0xe0 [ 155.292246] ? __release_sock+0xc0/0x140 [ 155.296727] tcp_sendmsg+0x27/0x40 [ 155.300621] inet_sendmsg+0xb3/0x1f0 [ 155.304709] ? aa_path_link+0x260/0x260 [ 155.309088] ? inet_recvmsg+0x210/0x210 [ 155.313469] ? fsnotify+0xae8/0xb30 [ 155.317462] ? inet_recvmsg+0x210/0x210 [ 155.332037] sock_sendmsg+0x64/0x80 [ 155.336029] sock_write_iter+0x148/0x1f0 [ 155.340506] ? sock_sendmsg+0x80/0x80 [ 155.344691] ? sock_recvmsg+0x90/0x90 [ 155.348881] ? tty_ldisc_deref+0x12/0x20 [ 155.353356] ? iov_iter_init+0x77/0xb0 [ 155.357639] __vfs_write+0x26e/0x370 [ 155.361727] ? kernel_read+0xa0/0xa0 [ 155.365816] ? _raw_spin_unlock_irq+0x73/0xc0 [ 155.370783] ? __fsnotify_update_child_dentry_flags.part.0+0x150/0x150 [ 155.378174] ? __fsnotify_parent+0x84/0x220 [ 155.382944] ? __fsnotify_update_child_dentry_flags.part.0+0x150/0x150 [ 155.390340] vfs_write+0xe9/0x240 [ 155.394137] SyS_write+0xa7/0x130 [ 155.397934] ? SyS_read+0x130/0x130 [ 155.401925] ? SyS_clock_settime+0x110/0x110 [ 155.406791] ? SyS_fcntl+0x82/0xb0 [ 155.410685] entry_SYSCALL_64_fastpath+0x1e/0x81 [ 155.415939] RIP: 0033:0x7fed3bbf4290 [ 155.420024] RSP: 002b:00007ffcffb69468 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 155.428600] RAX: ffffffffffffffda RBX: 00007fed3bec1b20 RCX: 00007fed3bbf4290 [ 155.436668] RDX: 0000000000000024 RSI: 0000559b52ddd308 RDI: 0000000000000003 [ 155.444735] RBP: 0000000000000021 R08: 0000559b52ddaf60 R09: 0000000000000014 [ 155.452803] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fed3bec1b78 [ 155.460870] R13: 0000559b52ddb030 R14: 0000559b52ddaf50 R15: 0000559b52ddaf50 [ 155.468942] [ 155.470697] Allocated by task 780: [ 155.474589] __kmalloc+0xfa/0x230 [ 155.478377] pfifo_fast_init+0x69/0x160 [ 155.482757] qdisc_create_dflt+0x69/0xb0 [ 155.487232] mq_init+0x195/0x1e0 [ 155.490931] qdisc_create_dflt+0x69/0xb0 [ 155.495407] dev_activate+0x48a/0x4e0 [ 155.499593] __dev_open+0x19e/0x210 [ 155.503583] __dev_change_flags+0x3b5/0x3f0 [ 155.508351] dev_change_flags+0x50/0xa0 [ 155.512729] do_setlink+0x5eb/0x1cf0 [ 155.516817] rtnl_newlink+0x9d5/0xe40 [ 155.521002] rtnetlink_rcv_msg+0x37c/0x7e0 [ 155.525673] netlink_rcv_skb+0x122/0x230 [ 155.530149] netlink_unicast+0x2ae/0x360 [ 155.534624] netlink_sendmsg+0x5d5/0x620 [ 155.539100] sock_sendmsg+0x64/0x80 [ 155.543090] ___sys_sendmsg+0x4a8/0x500 [ 155.547467] __sys_sendmsg+0xa9/0x140 [ 155.551643] entry_SYSCALL_64_fastpath+0x1e/0x81 [ 155.556893] [ 155.558646] Freed by task 0: [ 155.561953] (stack is not available) [ 155.566035] [ 155.567791] The buggy address belongs to the object at ffff88086bb62100 [ 155.567791] which belongs to the cache kmalloc-8192 of size 8192 [ 155.582099] The buggy address is located 8000 bytes inside of [ 155.582099] 8192-byte region [ffff88086bb62100, ffff88086bb64100) [ 155.595529] The buggy address belongs to the page: [ 155.600977] page:00000000a9a82c52 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 [ 155.612081] flags: 0x6ffff0000008100(slab|head) [ 155.617240] raw: 06ffff0000008100 0000000000000000 0000000000000000 0000000100030003 [ 155.626010] raw: dead000000000100 dead000000000200 ffff8803afc0e680 0000000000000000 [ 155.634776] page dumped because: kasan: bad access detected [ 155.641094] [ 155.642935] Memory state around the buggy address: [ 155.648382] ffff88086bb63f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 155.656568] ffff88086bb63f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 155.664756] >ffff88086bb64000: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 155.672943] ^ [ 155.678972] ffff88086bb64080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 155.687160] ffff88086bb64100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 155.695346] ==================================================================