Hi, There are occasions where the walker in tdb_walk() might sleep. Case SADB_DUMP is such a case. And mvs@ has a diff that sleeps to read the counters. So holding the tdb_sadb_mtx() when calling walker() is not allowed.
Move the TDB from the TDB-Hash to a list that is protected by netlock. Then unlock tdb_sadb_mtx and traverse the list to call walker(). We need less tdb_sadb_mtx dances with that solution. If needed, netlock protection can be replaced with another rwlock later. ok? bluhm Index: net/pfkeyv2.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v retrieving revision 1.228 diff -u -p -r1.228 pfkeyv2.c --- net/pfkeyv2.c 14 Dec 2021 17:50:37 -0000 1.228 +++ net/pfkeyv2.c 17 Dec 2021 21:59:00 -0000 @@ -1045,7 +1045,7 @@ pfkeyv2_sa_flush(struct tdb *tdb, void * { if (!(*((u_int8_t *) satype_vp)) || tdb->tdb_satype == *((u_int8_t *) satype_vp)) - tdb_delete_locked(tdb); + tdb_delete(tdb); return (0); } Index: netinet/ip_ipsp.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v retrieving revision 1.265 diff -u -p -r1.265 ip_ipsp.c --- netinet/ip_ipsp.c 14 Dec 2021 17:50:37 -0000 1.265 +++ netinet/ip_ipsp.c 17 Dec 2021 22:02:27 -0000 @@ -90,7 +90,6 @@ void tdb_firstuse(void *); void tdb_soft_timeout(void *); void tdb_soft_firstuse(void *); int tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t); -void tdb_dodelete(struct tdb *, int locked); int ipsec_in_use = 0; u_int64_t ipsec_last_added = 0; @@ -627,30 +626,36 @@ tdb_printit(void *addr, int full, int (* int tdb_walk(u_int rdomain, int (*walker)(struct tdb *, void *, int), void *arg) { - int i, rval = 0; - struct tdb *tdbp, *next; + SIMPLEQ_HEAD(, tdb) tdblist; + struct tdb *tdbp; + int i, rval; /* - * The walker may aquire the kernel lock. Grab it here to keep - * the lock order. + * The walker may sleep. So we cannot hold the tdb_sadb_mtx while + * traversing the tdb_hnext list. Create a new tdb_walk list with + * exclusive netlock protection. */ - KERNEL_LOCK(); + NET_ASSERT_WLOCKED(); + SIMPLEQ_INIT(&tdblist); + mtx_enter(&tdb_sadb_mtx); for (i = 0; i <= tdb_hashmask; i++) { - for (tdbp = tdbh[i]; rval == 0 && tdbp != NULL; tdbp = next) { - next = tdbp->tdb_hnext; - + for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbp->tdb_hnext) { if (rdomain != tdbp->tdb_rdomain) continue; - - if (i == tdb_hashmask && next == NULL) - rval = walker(tdbp, (void *)arg, 1); - else - rval = walker(tdbp, (void *)arg, 0); + tdb_ref(tdbp); + SIMPLEQ_INSERT_TAIL(&tdblist, tdbp, tdb_walk); } } mtx_leave(&tdb_sadb_mtx); - KERNEL_UNLOCK(); + + rval = 0; + while ((tdbp = SIMPLEQ_FIRST(&tdblist)) != NULL) { + SIMPLEQ_REMOVE_HEAD(&tdblist, tdb_walk); + if (rval == 0) + rval = walker(tdbp, arg, SIMPLEQ_EMPTY(&tdblist)); + tdb_unref(tdbp); + } return rval; } @@ -764,7 +769,6 @@ tdb_rehash(void) return (ENOMEM); } - for (i = 0; i <= old_hashmask; i++) { for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) { tdbnp = tdbp->tdb_hnext; @@ -1004,19 +1008,6 @@ tdb_unref(struct tdb *tdb) void tdb_delete(struct tdb *tdbp) { - tdb_dodelete(tdbp, 0); -} - -void -tdb_delete_locked(struct tdb *tdbp) -{ - MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx); - tdb_dodelete(tdbp, 1); -} - -void -tdb_dodelete(struct tdb *tdbp, int locked) -{ NET_ASSERT_LOCKED(); mtx_enter(&tdbp->tdb_mtx); @@ -1026,10 +1017,7 @@ tdb_dodelete(struct tdb *tdbp, int locke } tdbp->tdb_flags |= TDBF_DELETED; mtx_leave(&tdbp->tdb_mtx); - if (locked) - tdb_unlink_locked(tdbp); - else - tdb_unlink(tdbp); + tdb_unlink(tdbp); /* cleanup SPD references */ tdb_cleanspd(tdbp); Index: netinet/ip_ipsp.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v retrieving revision 1.231 diff -u -p -r1.231 ip_ipsp.h --- netinet/ip_ipsp.h 14 Dec 2021 17:50:37 -0000 1.231 +++ netinet/ip_ipsp.h 17 Dec 2021 21:59:00 -0000 @@ -335,6 +335,7 @@ struct tdb { /* tunnel descriptor blo struct tdb *tdb_snext; /* [s] src/sproto table */ struct tdb *tdb_inext; struct tdb *tdb_onext; + SIMPLEQ_ENTRY(tdb) tdb_walk; /* [N] temp list for tdb walker */ struct refcnt tdb_refcnt; struct mutex tdb_mtx; @@ -583,7 +584,6 @@ struct tdb *gettdbbysrcdst_dir(u_int, u_ void puttdb(struct tdb *); void puttdb_locked(struct tdb *); void tdb_delete(struct tdb *); -void tdb_delete_locked(struct tdb *); struct tdb *tdb_alloc(u_int); struct tdb *tdb_ref(struct tdb *); void tdb_unref(struct tdb *);