On Wed, Jul 07, 2021 at 10:01:59PM +0200, Hrvoje Popovski wrote: > On 7.7.2021. 19:38, Vitaliy Makkoveev wrote: > > Hi, > > > > It seems the first the first panic occured because ipsp_spd_lookup() > > modifies tdbp->tdb_policy_head and simultaneous execution breaks it. > > I guess at least mutex(9) should be used to protect `tdb_policy_head'. > > > > The second panic occured because ipsp_acquire_sa() does > > `ipsec_acquire_pool' initialization in runtime so parallel execution > > breaks it. It's easy to fix. > > > > Could you try the diff below? It moves `ipsec_acquire_pool' > > initialization to pfkey_init() just after `ipsec_policy_pool' > > initialization. This should fix the second panic. > > > Hi, > > with this diff i manage to get this panics .. > > r620-1# uvm_fault(0xfffffd842f83e170, 0x147, 0, 2) -> e > kernel: page fault trap, code=0 > Stopped at tdb_free+0x9c: movq %rsi,0(%rdi) > TID PID UID PRFLAGS PFLAGS CPU COMMAND > * 38448 89106 68 0x10 0 1K isakmpd > tdb_free(ffff80000131e688) at tdb_free+0x9c > pfkeyv2_send(fffffd83b2512d30,ffff8000012f2680,50) at pfkeyv2_send+0x1191 > pfkeyv2_output(fffffd80a4b37400,fffffd83b2512d30,0,0) at pfkeyv2_output+0x8a > pfkeyv2_usrreq(fffffd83b2512d30,9,fffffd80a4b37400,0,0,ffff800023908d20) > at pfkeyv2_usrreq+0x1b0 > sosend(fffffd83b2512d30,0,ffff800023959130,0,0,0) at sosend+0x3a9 > dofilewritev(ffff800023908d20,7,ffff800023959130,0,ffff800023959230) at > dofilewritev+0x14d > sys_writev(ffff800023908d20,ffff8000239591d0,ffff800023959230) at > sys_writev+0xe2 > syscall(ffff8000239592a0) at syscall+0x3b9 > Xsyscall() at Xsyscall+0x128 > end of kernel > end trace frame: 0x7f7ffffb27c0, count: 6 > https://www.openbsd.org/ddb.html describes the minimum info required in > bug reports. Insufficient info makes it difficult to find and fix bugs. > ddb{1}> > > > > r620-1# uvm_fault(0xffffffff82379d80, 0x147, 0, 2) -> e > kernel: page fault trap, code=0 > Stopped at ipsp_spd_lookup+0x9a4: movq %rax,0(%rcx) > TID PID UID PRFLAGS PFLAGS CPU COMMAND > 434446 37326 0 0x14000 0x200 4 softnet > 405134 41722 0 0x14000 0x200 1 softnet > 106389 88948 0 0x14000 0x200 3 softnet > *262295 93659 0 0x14000 0x200 2 softnet > ipsp_spd_lookup(fffffd80a44f6f00,2,14,ffff8000238651bc,2,0) at > ipsp_spd_lookup+0x9a4 > ip_output_ipsec_lookup(fffffd80a44f6f00,14,ffff8000238651bc,0,0) at > ip_output_ipsec_lookup+0x4d > ip_output(fffffd80a44f6f00,0,ffff800023865348,1,0,0) at ip_output+0x42a > ip_forward(fffffd80a44f6f00,ffff800000087048,fffffd83b3ea6d98,0) at > ip_forward+0x26a > ip_input_if(ffff800023865488,ffff800023865494,4,0,ffff800000087048) at > ip_input_if+0x365 > ipv4_input(ffff800000087048,fffffd80a44f6f00) at ipv4_input+0x39 > if_input_process(ffff800000087048,ffff800023865508) at if_input_process+0x6f > ifiq_process(ffff800000086c00) at ifiq_process+0x69 > taskq_thread(ffff800000030000) at taskq_thread+0x9f > end trace frame: 0x0, count: 6 > https://www.openbsd.org/ddb.html describes the minimum info required in > bug reports. Insufficient info makes it difficult to find and fix bugs. >
Thanks. ipsp_spd_lookup() stopped panic in pool_get(9). I guess the panics continue because simultaneous modifications of 'tdbp->tdb_policy_head' break it. Could you try the diff below? It introduces `tdb_polhd_mtx' mutex(9) and uses it to protect 'tdbp->tdb_policy_head' modifications. I don't propose this diff for commit but to check my suggestion. Index: sys/net/pfkeyv2.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2.c,v retrieving revision 1.216 diff -u -p -r1.216 pfkeyv2.c --- sys/net/pfkeyv2.c 5 Jul 2021 12:01:20 -0000 1.216 +++ sys/net/pfkeyv2.c 7 Jul 2021 20:21:44 -0000 @@ -2000,9 +2000,12 @@ pfkeyv2_send(struct socket *so, void *me (caddr_t)&ipo->ipo_mask, rnh, ipo->ipo_nodes, 0)) == NULL) { /* Remove from linked list of policies on TDB */ - if (ipo->ipo_tdb) + if (ipo->ipo_tdb) { + mtx_enter(&tdb_polhd_mtx); TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); + } if (ipo->ipo_ids) ipsp_ids_free(ipo->ipo_ids); Index: sys/netinet/ip_ipsp.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_ipsp.c,v retrieving revision 1.238 diff -u -p -r1.238 ip_ipsp.c --- sys/netinet/ip_ipsp.c 10 Mar 2021 10:21:49 -0000 1.238 +++ sys/netinet/ip_ipsp.c 7 Jul 2021 20:21:44 -0000 @@ -92,6 +92,7 @@ u_int64_t ipsec_last_added = 0; int ipsec_ids_idle = 100; /* keep free ids for 100s */ struct pool tdb_pool; +struct mutex tdb_polhd_mtx=MUTEX_INITIALIZER(IPL_SOFTNET); /* Protected by the NET_LOCK(). */ u_int32_t ipsec_ids_next_flow = 1; /* may not be zero */ @@ -846,12 +847,14 @@ tdb_free(struct tdb *tdbp) #endif /* Cleanup SPD references. */ + mtx_enter(&tdb_polhd_mtx); for (ipo = TAILQ_FIRST(&tdbp->tdb_policy_head); ipo; ipo = TAILQ_FIRST(&tdbp->tdb_policy_head)) { TAILQ_REMOVE(&tdbp->tdb_policy_head, ipo, ipo_tdb_next); ipo->ipo_tdb = NULL; ipo->ipo_last_searched = 0; /* Force a re-search. */ } + mtx_leave(&tdb_polhd_mtx); if (tdbp->tdb_ids) { ipsp_ids_free(tdbp->tdb_ids); Index: sys/netinet/ip_ipsp.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_ipsp.h,v retrieving revision 1.197 diff -u -p -r1.197 ip_ipsp.h --- sys/netinet/ip_ipsp.h 4 May 2021 09:28:04 -0000 1.197 +++ sys/netinet/ip_ipsp.h 7 Jul 2021 20:21:44 -0000 @@ -480,6 +480,8 @@ struct xformsw { int, int); /* output */ }; +extern struct mutex tdb_polhd_mtx; + extern int ipsec_in_use; extern u_int64_t ipsec_last_added; extern int encdebug; /* enable message reporting */ Index: sys/netinet/ip_spd.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_spd.c,v retrieving revision 1.103 diff -u -p -r1.103 ip_spd.c --- sys/netinet/ip_spd.c 4 May 2021 09:28:04 -0000 1.103 +++ sys/netinet/ip_spd.c 7 Jul 2021 20:21:44 -0000 @@ -370,8 +370,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, /* Do we have a cached entry ? If so, check if it's still valid. */ if ((ipo->ipo_tdb) && (ipo->ipo_tdb->tdb_flags & TDBF_INVALID)) { + mtx_enter(&tdb_polhd_mtx); TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); ipo->ipo_tdb = NULL; } @@ -419,8 +421,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, nomatchout: /* Cached TDB was not good. */ + mtx_enter(&tdb_polhd_mtx); TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); ipo->ipo_tdb = NULL; ipo->ipo_last_searched = 0; } @@ -447,8 +451,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, ids ? ids: ipo->ipo_ids, &ipo->ipo_addr, &ipo->ipo_mask); if (ipo->ipo_tdb) { + mtx_enter(&tdb_polhd_mtx); TAILQ_INSERT_TAIL(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); *error = 0; return ipsp_spd_inp(m, af, hlen, error, direction, tdbp, inp, ipo); @@ -521,12 +527,14 @@ ipsp_spd_lookup(struct mbuf *m, int af, goto nomatchin; /* Add it to the cache. */ + mtx_enter(&tdb_polhd_mtx); if (ipo->ipo_tdb) TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); ipo->ipo_tdb = tdbp; TAILQ_INSERT_TAIL(&tdbp->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); *error = 0; return ipsp_spd_inp(m, af, hlen, error, direction, tdbp, inp, ipo); @@ -550,8 +558,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, goto skipinputsearch; /* Not applicable, unlink. */ + mtx_enter(&tdb_polhd_mtx); TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); ipo->ipo_last_searched = 0; ipo->ipo_tdb = NULL; } @@ -566,9 +576,12 @@ ipsp_spd_lookup(struct mbuf *m, int af, dignore ? &ssrc : &ipo->ipo_dst, ipo->ipo_sproto, ipo->ipo_ids, &ipo->ipo_addr, &ipo->ipo_mask); - if (ipo->ipo_tdb) + if (ipo->ipo_tdb) { + mtx_enter(&tdb_polhd_mtx); TAILQ_INSERT_TAIL(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); + } } skipinputsearch: @@ -638,9 +651,12 @@ ipsec_delete_policy(struct ipsec_policy rn_delete(&ipo->ipo_addr, &ipo->ipo_mask, rnh, rn) == NULL) return (ESRCH); - if (ipo->ipo_tdb != NULL) + if (ipo->ipo_tdb != NULL) { + mtx_enter(&tdb_polhd_mtx); TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo, ipo_tdb_next); + mtx_leave(&tdb_polhd_mtx); + } while ((ipa = TAILQ_FIRST(&ipo->ipo_acquires)) != NULL) ipsp_delete_acquire(ipa);