Hi,
Here is the backtrace for a reproducable panic seen with arptimer():
#0 doadump (textdump=0) at pcpu.h:221
#1 0xffffffff80385afb in db_dump (dummy=<value optimized out>, dummy2=false,
dummy3=0, dummy4=0x0) at /usr/img/freebsd/sys/ddb/db_command.c:533
#2 0xffffffff803858ee in db_command (cmd_table=0x0) at
/usr/img/freebsd/sys/ddb/db_command.c:440
#3 0xffffffff80385684 in db_command_loop () at
/usr/img/freebsd/sys/ddb/db_command.c:493
#4 0xffffffff8038818b in db_trap (type=<value optimized out>, code=0) at
/usr/img/freebsd/sys/ddb/db_main.c:251
#5 0xffffffff80ae0973 in kdb_trap (type=12, code=0, tf=<value optimized out>)
at /usr/img/freebsd/sys/kern/subr_kdb.c:654
#6 0xffffffff80f276f1 in trap_fatal (frame=0xfffffe00f59f4950, eva=<value
optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:829
#7 0xffffffff80f27924 in trap_pfault (frame=0xfffffe00f59f4950, usermode=<value
optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:684
#8 0xffffffff80f270de in trap (frame=0xfffffe00f59f4950) at
/usr/img/freebsd/sys/amd64/amd64/trap.c:435
#9 0xffffffff80f0a347 in calltrap () at
/usr/img/freebsd/sys/amd64/amd64/exception.S:234
#10 0xffffffff80be9e3d in arptimer (arg=0xfffff8011d0fda00) at atomic.h:184
#11 0xffffffff80ab54f1 in softclock_call_cc (c=0xfffff8011d0fdaa8,
cc=0xffffffff81ccd600, direct=<value optimized out>)
at /usr/img/freebsd/sys/kern/kern_timeout.c:832
#12 0xffffffff80ab5814 in softclock (arg=0xffffffff81ccd600) at
/usr/img/freebsd/sys/kern/kern_timeout.c:921
#13 0xffffffff80a5d7f6 in intr_event_execute_handlers (p=<value optimized out>,
ie=0xfffff80003998b00) at /usr/img/freebsd/sys/kern/kern_intr.c:1262
#14 0xffffffff80a5de06 in ithread_loop (arg=0xfffff8000396cde0) at
/usr/img/freebsd/sys/kern/kern_intr.c:1275
#15 0xffffffff80a5a87c in fork_exit (callout=0xffffffff80a5dd60 <ithread_loop>,
arg=0xfffff8000396cde0, frame=0xfffffe00f59f4c00)
at /usr/img/freebsd/sys/kern/kern_fork.c:1011
#16 0xffffffff80f0a87e in fork_trampoline () at
/usr/img/freebsd/sys/amd64/amd64/exception.S:609
#17 0x0000000000000000 in ?? ()
(kgdb) print ((struct llentry *)arg)[0]
$5 = {
lle_next = {
le_next = 0x0,
le_prev = 0xfffff80069b5fa98
},
r_l3addr = {
addr4 = {
s_addr = 1563742475
},
addr6 = {
__u6_addr = {
__u6_addr8 = 0xfffff8011d0fda10 "\v�4]",
__u6_addr16 = 0xfffff8011d0fda10,
__u6_addr32 = 0xfffff8011d0fda10
}
}
},
ll_addr = {
mac_aligned = 121984137371108,
mac16 = 0xfffff8011d0fda20,
mac8 = 0xfffff8011d0fda20 "�\035-��n"
},
r_flags = 1,
r_skip_req = 1,
spare1 = 0,
lle_tbl = 0xfffff80005653300,
lle_head = 0xfffff80069b5fa98,
lle_free = 0xffffffff80bf2270 <in_lltable_destroy_lle>,
la_hold = 0x0,
la_numheld = 0,
la_expire = 12422,
la_flags = 1,
la_asked = 0,
la_preempt = 5,
ln_state = 2,
ln_router = 0,
ln_ntick = 0,
lle_refcnt = 1,
lle_chain = {
le_next = 0x0,
le_prev = 0x0
},
lle_timer = {
c_links = {
le = {
le_next = 0x0,
le_prev = 0xffffffff81ccd718
},
sle = {
sle_next = 0x0
},
tqe = {
tqe_next = 0x0,
tqe_prev = 0xffffffff81ccd718
}
},
c_time = 53354272998546,
c_precision = 268435437,
c_arg = 0xfffff8011d0fda00,
c_func = 0xffffffff80be9950 <arptimer>,
c_lock = 0x0,
c_flags = 16,
c_cpu = 0
},
lle_lock = {
lock_object = {
lo_name = 0xffffffff8144abf0 "lle",
lo_flags = 90374144,
lo_data = 0,
lo_witness = 0x0
},
rw_lock = 1
},
req_mtx = {
lock_object = {
lo_name = 0xffffffff8144abf4 "lle req",
lo_flags = 16973824,
lo_data = 0,
lo_witness = 0x0
},
mtx_lock = 4
}
}
(kgdb) print /x ((struct llentry *)arg)->lle_tbl[0]
$6 = {
llt_link = {
sle_next = 0xfffff8004be51900
},
llt_af = 0x1d243aa0,
llt_hsize = 0xfffff801,
lle_head = 0xfffff800053e5330,
llt_ifp = 0xd04,
llt_lookup = 0x0,
llt_alloc_entry = 0xfffffe0001a472f0,
llt_delete_entry = 0xfffff800504a5100,
llt_prefix_free = 0xfffff800503d8c88,
llt_dump_entry = 0x0,
llt_hash = 0x0,
llt_match_prefix = 0x0,
llt_free_entry = 0x0,
llt_foreach_entry = 0x0,
llt_link_entry = 0x0,
llt_unlink_entry = 0x38e425e,
llt_fill_sa_entry = 0x0,
llt_free_tbl = 0xfffff8011d243ab0
}
It appears arptimer() was called after lltable_unlink_entry() was
called, because la_flags does not have the LLE_LINKED bit set, which can
happen!! If arptimer() is firing exactly when we call
lltable_unlink_entry(), then arptimer() will refer to freed memory. Does
the following patch make sense?
Index: netinet/if_ether.c
===================================================================
--- netinet/if_ether.c (revision 291256)
+++ netinet/if_ether.c (working copy)
@@ -185,7 +185,13 @@
LLE_WUNLOCK(lle);
return;
}
- ifp = lle->lle_tbl->llt_ifp;
+ if (lle->la_flags & LLE_LINKED) {
+ ifp = lle->lle_tbl->llt_ifp;
+ } else {
+ /* XXX RACE entry has been freed */
+ llentry_free(lle);
+ return;
+ }
CURVNET_SET(ifp->if_vnet);
if ((lle->la_flags & LLE_DELETED) == 0) {
If you need more information from the dump, let me know.
--HPS
_______________________________________________
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"