Hi,

Here is the backtrace for a reproducable panic seen with arptimer():

#0  doadump (textdump=0) at pcpu.h:221
#1  0xffffffff80385afb in db_dump (dummy=<value optimized out>, dummy2=false, 
dummy3=0, dummy4=0x0) at /usr/img/freebsd/sys/ddb/db_command.c:533
#2  0xffffffff803858ee in db_command (cmd_table=0x0) at 
/usr/img/freebsd/sys/ddb/db_command.c:440
#3  0xffffffff80385684 in db_command_loop () at 
/usr/img/freebsd/sys/ddb/db_command.c:493
#4  0xffffffff8038818b in db_trap (type=<value optimized out>, code=0) at 
/usr/img/freebsd/sys/ddb/db_main.c:251
#5  0xffffffff80ae0973 in kdb_trap (type=12, code=0, tf=<value optimized out>) 
at /usr/img/freebsd/sys/kern/subr_kdb.c:654
#6  0xffffffff80f276f1 in trap_fatal (frame=0xfffffe00f59f4950, eva=<value 
optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:829
#7  0xffffffff80f27924 in trap_pfault (frame=0xfffffe00f59f4950, usermode=<value 
optimized out>) at /usr/img/freebsd/sys/amd64/amd64/trap.c:684
#8  0xffffffff80f270de in trap (frame=0xfffffe00f59f4950) at 
/usr/img/freebsd/sys/amd64/amd64/trap.c:435
#9  0xffffffff80f0a347 in calltrap () at 
/usr/img/freebsd/sys/amd64/amd64/exception.S:234
#10 0xffffffff80be9e3d in arptimer (arg=0xfffff8011d0fda00) at atomic.h:184
#11 0xffffffff80ab54f1 in softclock_call_cc (c=0xfffff8011d0fdaa8, 
cc=0xffffffff81ccd600, direct=<value optimized out>)
    at /usr/img/freebsd/sys/kern/kern_timeout.c:832
#12 0xffffffff80ab5814 in softclock (arg=0xffffffff81ccd600) at 
/usr/img/freebsd/sys/kern/kern_timeout.c:921
#13 0xffffffff80a5d7f6 in intr_event_execute_handlers (p=<value optimized out>, 
ie=0xfffff80003998b00) at /usr/img/freebsd/sys/kern/kern_intr.c:1262
#14 0xffffffff80a5de06 in ithread_loop (arg=0xfffff8000396cde0) at 
/usr/img/freebsd/sys/kern/kern_intr.c:1275
#15 0xffffffff80a5a87c in fork_exit (callout=0xffffffff80a5dd60 <ithread_loop>, 
arg=0xfffff8000396cde0, frame=0xfffffe00f59f4c00)
    at /usr/img/freebsd/sys/kern/kern_fork.c:1011
#16 0xffffffff80f0a87e in fork_trampoline () at 
/usr/img/freebsd/sys/amd64/amd64/exception.S:609
#17 0x0000000000000000 in ?? ()

(kgdb) print  ((struct llentry *)arg)[0]
$5 = {
  lle_next = {
    le_next = 0x0,
    le_prev = 0xfffff80069b5fa98
  },
  r_l3addr = {
    addr4 = {
      s_addr = 1563742475
    },
    addr6 = {
      __u6_addr = {
        __u6_addr8 = 0xfffff8011d0fda10 "\v�4]",
        __u6_addr16 = 0xfffff8011d0fda10,
        __u6_addr32 = 0xfffff8011d0fda10
      }
    }
  },
  ll_addr = {
    mac_aligned = 121984137371108,
    mac16 = 0xfffff8011d0fda20,
    mac8 = 0xfffff8011d0fda20 "�\035-��n"
  },
  r_flags = 1,
  r_skip_req = 1,
  spare1 = 0,
  lle_tbl = 0xfffff80005653300,
  lle_head = 0xfffff80069b5fa98,
  lle_free = 0xffffffff80bf2270 <in_lltable_destroy_lle>,
  la_hold = 0x0,
  la_numheld = 0,
  la_expire = 12422,
  la_flags = 1,
  la_asked = 0,
  la_preempt = 5,
  ln_state = 2,
  ln_router = 0,
  ln_ntick = 0,
  lle_refcnt = 1,
  lle_chain = {
    le_next = 0x0,
    le_prev = 0x0
  },
  lle_timer = {
    c_links = {
      le = {
        le_next = 0x0,
        le_prev = 0xffffffff81ccd718
      },
      sle = {
        sle_next = 0x0
      },
      tqe = {
        tqe_next = 0x0,
        tqe_prev = 0xffffffff81ccd718
      }
    },
    c_time = 53354272998546,
    c_precision = 268435437,
    c_arg = 0xfffff8011d0fda00,
    c_func = 0xffffffff80be9950 <arptimer>,
    c_lock = 0x0,
    c_flags = 16,
    c_cpu = 0
  },
  lle_lock = {
    lock_object = {
      lo_name = 0xffffffff8144abf0 "lle",
      lo_flags = 90374144,
      lo_data = 0,
      lo_witness = 0x0
    },
    rw_lock = 1
  },
  req_mtx = {
    lock_object = {
      lo_name = 0xffffffff8144abf4 "lle req",
      lo_flags = 16973824,
      lo_data = 0,
      lo_witness = 0x0
    },
    mtx_lock = 4
  }
}

(kgdb) print /x ((struct llentry *)arg)->lle_tbl[0]
$6 = {
  llt_link = {
    sle_next = 0xfffff8004be51900
  },
  llt_af = 0x1d243aa0,
  llt_hsize = 0xfffff801,
  lle_head = 0xfffff800053e5330,
  llt_ifp = 0xd04,
  llt_lookup = 0x0,
  llt_alloc_entry = 0xfffffe0001a472f0,
  llt_delete_entry = 0xfffff800504a5100,
  llt_prefix_free = 0xfffff800503d8c88,
  llt_dump_entry = 0x0,
  llt_hash = 0x0,
  llt_match_prefix = 0x0,
  llt_free_entry = 0x0,
  llt_foreach_entry = 0x0,
  llt_link_entry = 0x0,
  llt_unlink_entry = 0x38e425e,
  llt_fill_sa_entry = 0x0,
  llt_free_tbl = 0xfffff8011d243ab0
}

It appears arptimer() was called after lltable_unlink_entry() was called, because la_flags does not have the LLE_LINKED bit set, which can happen!! If arptimer() is firing exactly when we call lltable_unlink_entry(), then arptimer() will refer to freed memory. Does the following patch make sense?

Index: netinet/if_ether.c
===================================================================
--- netinet/if_ether.c  (revision 291256)
+++ netinet/if_ether.c  (working copy)
@@ -185,7 +185,13 @@
                LLE_WUNLOCK(lle);
                return;
        }
-       ifp = lle->lle_tbl->llt_ifp;
+       if (lle->la_flags & LLE_LINKED) {
+               ifp = lle->lle_tbl->llt_ifp;
+       } else {
+               /* XXX RACE entry has been freed */
+               llentry_free(lle);
+               return;
+       }
        CURVNET_SET(ifp->if_vnet);

        if ((lle->la_flags & LLE_DELETED) == 0) {

If you need more information from the dump, let me know.

--HPS

_______________________________________________
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to