Hi,

There are occasions where the walker in tdb_walk() might sleep.
Case SADB_DUMP is such a case.  And mvs@ has a diff that sleeps to
read the counters.  So holding the tdb_sadb_mtx() when calling
walker() is not allowed.

Move the TDB from the TDB-Hash to a list that is protected by
netlock.  Then unlock tdb_sadb_mtx and traverse the list to call
walker().

We need less tdb_sadb_mtx dances with that solution.  If needed,
netlock protection can be replaced with another rwlock later.

ok?

bluhm

Index: net/pfkeyv2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.228
diff -u -p -r1.228 pfkeyv2.c
--- net/pfkeyv2.c       14 Dec 2021 17:50:37 -0000      1.228
+++ net/pfkeyv2.c       17 Dec 2021 21:59:00 -0000
@@ -1045,7 +1045,7 @@ pfkeyv2_sa_flush(struct tdb *tdb, void *
 {
        if (!(*((u_int8_t *) satype_vp)) ||
            tdb->tdb_satype == *((u_int8_t *) satype_vp))
-               tdb_delete_locked(tdb);
+               tdb_delete(tdb);
        return (0);
 }
 
Index: netinet/ip_ipsp.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.265
diff -u -p -r1.265 ip_ipsp.c
--- netinet/ip_ipsp.c   14 Dec 2021 17:50:37 -0000      1.265
+++ netinet/ip_ipsp.c   17 Dec 2021 22:02:27 -0000
@@ -90,7 +90,6 @@ void          tdb_firstuse(void *);
 void           tdb_soft_timeout(void *);
 void           tdb_soft_firstuse(void *);
 int            tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
-void           tdb_dodelete(struct tdb *, int locked);
 
 int ipsec_in_use = 0;
 u_int64_t ipsec_last_added = 0;
@@ -627,30 +626,36 @@ tdb_printit(void *addr, int full, int (*
 int
 tdb_walk(u_int rdomain, int (*walker)(struct tdb *, void *, int), void *arg)
 {
-       int i, rval = 0;
-       struct tdb *tdbp, *next;
+       SIMPLEQ_HEAD(, tdb) tdblist;
+       struct tdb *tdbp;
+       int i, rval;
 
        /*
-        * The walker may aquire the kernel lock.  Grab it here to keep
-        * the lock order.
+        * The walker may sleep.  So we cannot hold the tdb_sadb_mtx while
+        * traversing the tdb_hnext list.  Create a new tdb_walk list with
+        * exclusive netlock protection.
         */
-       KERNEL_LOCK();
+       NET_ASSERT_WLOCKED();
+       SIMPLEQ_INIT(&tdblist);
+
        mtx_enter(&tdb_sadb_mtx);
        for (i = 0; i <= tdb_hashmask; i++) {
-               for (tdbp = tdbh[i]; rval == 0 && tdbp != NULL; tdbp = next) {
-                       next = tdbp->tdb_hnext;
-
+               for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbp->tdb_hnext) {
                        if (rdomain != tdbp->tdb_rdomain)
                                continue;
-
-                       if (i == tdb_hashmask && next == NULL)
-                               rval = walker(tdbp, (void *)arg, 1);
-                       else
-                               rval = walker(tdbp, (void *)arg, 0);
+                       tdb_ref(tdbp);
+                       SIMPLEQ_INSERT_TAIL(&tdblist, tdbp, tdb_walk);
                }
        }
        mtx_leave(&tdb_sadb_mtx);
-       KERNEL_UNLOCK();
+
+       rval = 0;
+       while ((tdbp = SIMPLEQ_FIRST(&tdblist)) != NULL) {
+               SIMPLEQ_REMOVE_HEAD(&tdblist, tdb_walk);
+               if (rval == 0)
+                       rval = walker(tdbp, arg, SIMPLEQ_EMPTY(&tdblist));
+               tdb_unref(tdbp);
+       }
 
        return rval;
 }
@@ -764,7 +769,6 @@ tdb_rehash(void)
                return (ENOMEM);
        }
 
-
        for (i = 0; i <= old_hashmask; i++) {
                for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) {
                        tdbnp = tdbp->tdb_hnext;
@@ -1004,19 +1008,6 @@ tdb_unref(struct tdb *tdb)
 void
 tdb_delete(struct tdb *tdbp)
 {
-       tdb_dodelete(tdbp, 0);
-}
-
-void
-tdb_delete_locked(struct tdb *tdbp)
-{
-       MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
-       tdb_dodelete(tdbp, 1);
-}
-
-void
-tdb_dodelete(struct tdb *tdbp, int locked)
-{
        NET_ASSERT_LOCKED();
 
        mtx_enter(&tdbp->tdb_mtx);
@@ -1026,10 +1017,7 @@ tdb_dodelete(struct tdb *tdbp, int locke
        }
        tdbp->tdb_flags |= TDBF_DELETED;
        mtx_leave(&tdbp->tdb_mtx);
-       if (locked)
-               tdb_unlink_locked(tdbp);
-       else
-               tdb_unlink(tdbp);
+       tdb_unlink(tdbp);
 
        /* cleanup SPD references */
        tdb_cleanspd(tdbp);
Index: netinet/ip_ipsp.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.231
diff -u -p -r1.231 ip_ipsp.h
--- netinet/ip_ipsp.h   14 Dec 2021 17:50:37 -0000      1.231
+++ netinet/ip_ipsp.h   17 Dec 2021 21:59:00 -0000
@@ -335,6 +335,7 @@ struct tdb {                                /* tunnel 
descriptor blo
        struct tdb      *tdb_snext;     /* [s] src/sproto table */
        struct tdb      *tdb_inext;
        struct tdb      *tdb_onext;
+       SIMPLEQ_ENTRY(tdb) tdb_walk;    /* [N] temp list for tdb walker */
 
        struct refcnt   tdb_refcnt;
        struct mutex    tdb_mtx;
@@ -583,7 +584,6 @@ struct      tdb *gettdbbysrcdst_dir(u_int, u_
 void   puttdb(struct tdb *);
 void   puttdb_locked(struct tdb *);
 void   tdb_delete(struct tdb *);
-void   tdb_delete_locked(struct tdb *);
 struct tdb *tdb_alloc(u_int);
 struct tdb *tdb_ref(struct tdb *);
 void   tdb_unref(struct tdb *);

Reply via email to