OpenBSD 6.2 includes nice performance and latency improvements due to
the work done in the Network Stack in the previous years.  However as
soon as IPsec is enabled, all network related processing are affected.
In other words you cannot profit from the last MP work in the Network
stack if you use IPsec.

During the last 6 months I hoped that somebody else would look at the
IPsec stack and tell us what needs to be done.  This didn't happen.

Now that 6.2 is released, we cannot afford to continue to parallelize
the Network Stack if some of our users and testers still run it under
KERNEL_LOCK(). 

So I did an audit of the IPsec stack and came with the small diff below.
This diff doesn't remove the KERNEL_LOCK() (yet), but add some asserts
to make sure that the global data structure are all accessed while
holding the NET_LOCK().

I introduced a mutex for protecting some globals in the PF_KEY sockets.
If you want to improve it and try to unlock the PF_KEY socket layer,
that'd make me really happy.

In the whole IPsec stack, a single timeout wasn't running in process
context, I changed that and made it grab the NET_LOCK().

I tried to document which data structures are protected by the
NET_LOCK(), to make it short it's "all of them".

Tests and comments welcome.

Index: net/if_enc.c
===================================================================
RCS file: /cvs/src/sys/net/if_enc.c,v
retrieving revision 1.69
diff -u -p -r1.69 if_enc.c
--- net/if_enc.c        11 Aug 2017 21:24:19 -0000      1.69
+++ net/if_enc.c        11 Oct 2017 14:44:48 -0000
@@ -214,6 +214,8 @@ enc_getif(u_int id, u_int unit)
 {
        struct ifnet    *ifp;
 
+       NET_ASSERT_LOCKED();
+
        /* Check if the caller wants to get a non-default enc interface */
        if (unit > 0) {
                if (unit > enc_max_unit)
Index: net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.167
diff -u -p -r1.167 pfkeyv2.c
--- net/pfkeyv2.c       9 Oct 2017 08:35:38 -0000       1.167
+++ net/pfkeyv2.c       11 Oct 2017 14:44:49 -0000
@@ -80,6 +80,8 @@
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/pool.h>
+#include <sys/mutex.h>
+
 #include <net/route.h>
 #include <netinet/ip_ipsp.h>
 #include <net/pfkeyv2.h>
@@ -148,6 +150,8 @@ struct dump_state {
 
 /* Static globals */
 static LIST_HEAD(, keycb) pfkeyv2_sockets = LIST_HEAD_INITIALIZER(keycb);
+
+struct mutex pfkeyv2_mtx = MUTEX_INITIALIZER(IPL_NONE);
 static uint32_t pfkeyv2_seq = 1;
 static int nregistered = 0;
 static int npromisc = 0;
@@ -260,11 +264,16 @@ pfkeyv2_detach(struct socket *so, struct
 
        LIST_REMOVE(kp, kcb_list);
 
-       if (kp->flags & PFKEYV2_SOCKETFLAGS_REGISTERED)
-               nregistered--;
-
-       if (kp->flags & PFKEYV2_SOCKETFLAGS_PROMISC)
-               npromisc--;
+       if (kp->flags &
+           (PFKEYV2_SOCKETFLAGS_REGISTERED|PFKEYV2_SOCKETFLAGS_PROMISC)) {
+               mtx_enter(&pfkeyv2_mtx);
+               if (kp->flags & PFKEYV2_SOCKETFLAGS_REGISTERED)
+                       nregistered--;
+
+               if (kp->flags & PFKEYV2_SOCKETFLAGS_PROMISC)
+                       npromisc--;
+               mtx_leave(&pfkeyv2_mtx);
+       }
 
        raw_detach(&kp->rcb);
        return (0);
@@ -940,23 +949,22 @@ pfkeyv2_send(struct socket *so, void *me
        struct ipsec_acquire *ipa;
        struct radix_node_head *rnh;
        struct radix_node *rn = NULL;
-
        struct keycb *kp, *bkp;
-
        void *freeme = NULL, *bckptr = NULL;
        void *headers[SADB_EXT_MAX + 1];
-
        union sockaddr_union *sunionp;
-
        struct tdb *sa1 = NULL, *sa2 = NULL;
-
        struct sadb_msg *smsg;
        struct sadb_spirange *sprng;
        struct sadb_sa *ssa;
        struct sadb_supported *ssup;
        struct sadb_ident *sid, *did;
-
        u_int rdomain;
+       int promisc;
+
+       mtx_enter(&pfkeyv2_mtx);
+       promisc = npromisc;
+       mtx_leave(&pfkeyv2_mtx);
 
        NET_LOCK();
 
@@ -972,7 +980,7 @@ pfkeyv2_send(struct socket *so, void *me
        rdomain = kp->rdomain;
 
        /* If we have any promiscuous listeners, send them a copy of the 
message */
-       if (npromisc) {
+       if (promisc) {
                struct mbuf *packet;
 
                if (!(freeme = malloc(sizeof(struct sadb_msg) + len, M_PFKEY,
@@ -1392,7 +1400,9 @@ pfkeyv2_send(struct socket *so, void *me
        case SADB_REGISTER:
                if (!(kp->flags & PFKEYV2_SOCKETFLAGS_REGISTERED)) {
                        kp->flags |= PFKEYV2_SOCKETFLAGS_REGISTERED;
+                       mtx_enter(&pfkeyv2_mtx);
                        nregistered++;
+                       mtx_leave(&pfkeyv2_mtx);
                }
 
                i = sizeof(struct sadb_supported) + sizeof(ealgs);
@@ -1788,11 +1798,15 @@ pfkeyv2_send(struct socket *so, void *me
                                if (j) {
                                        kp->flags |=
                                            PFKEYV2_SOCKETFLAGS_PROMISC;
+                                       mtx_enter(&pfkeyv2_mtx);
                                        npromisc++;
+                                       mtx_leave(&pfkeyv2_mtx);
                                } else {
                                        kp->flags &=
                                            ~PFKEYV2_SOCKETFLAGS_PROMISC;
+                                       mtx_enter(&pfkeyv2_mtx);
                                        npromisc--;
+                                       mtx_leave(&pfkeyv2_mtx);
                                }
                        }
                }
@@ -1859,11 +1873,15 @@ pfkeyv2_acquire(struct ipsec_policy *ipo
        struct sadb_prop *sa_prop;
        struct sadb_msg *smsg;
        int rval = 0;
-       int i, j;
+       int i, j, registered;
 
+       mtx_enter(&pfkeyv2_mtx);
        *seq = pfkeyv2_seq++;
 
-       if (!nregistered) {
+       registered = nregistered;
+       mtx_leave(&pfkeyv2_mtx);
+
+       if (!registered) {
                rval = ESRCH;
                goto ret;
        }
@@ -2100,7 +2118,10 @@ pfkeyv2_expire(struct tdb *sa, u_int16_t
        smsg->sadb_msg_type = SADB_EXPIRE;
        smsg->sadb_msg_satype = sa->tdb_satype;
        smsg->sadb_msg_len = i / sizeof(uint64_t);
+
+       mtx_enter(&pfkeyv2_mtx);
        smsg->sadb_msg_seq = pfkeyv2_seq++;
+       mtx_leave(&pfkeyv2_mtx);
 
        headers[SADB_EXT_SA] = p;
        export_sa(&p, sa);
Index: netinet/ip_ipsp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.227
diff -u -p -r1.227 ip_ipsp.c
--- netinet/ip_ipsp.c   11 Oct 2017 13:44:49 -0000      1.227
+++ netinet/ip_ipsp.c   11 Oct 2017 14:44:50 -0000
@@ -87,16 +87,14 @@ int         tdb_hash(u_int, u_int32_t, union so
 
 int ipsec_in_use = 0;
 u_int64_t ipsec_last_added = 0;
+int ipsec_ids_idle = 100;              /* keep free ids for 100s */
 
-struct ipsec_policy_head ipsec_policy_head =
-    TAILQ_HEAD_INITIALIZER(ipsec_policy_head);
-struct ipsec_acquire_head ipsec_acquire_head =
-    TAILQ_HEAD_INITIALIZER(ipsec_acquire_head);
-
+/* Protected by the NET_LOCK(). */
 u_int32_t ipsec_ids_next_flow = 1;     /* may not be zero */
-int ipsec_ids_idle = 100;              /* keep free ids for 100s */
 struct ipsec_ids_tree ipsec_ids_tree;
 struct ipsec_ids_flows ipsec_ids_flows;
+struct ipsec_policy_head ipsec_policy_head =
+    TAILQ_HEAD_INITIALIZER(ipsec_policy_head);
 
 void ipsp_ids_timeout(void *);
 static inline int ipsp_ids_cmp(const struct ipsec_ids *,
@@ -173,6 +171,7 @@ struct xformsw *xformswNXFORMSW = &xform
 
 #define        TDB_HASHSIZE_INIT       32
 
+/* Protected by the NET_LOCK(). */
 static SIPHASH_KEY tdbkey;
 static struct tdb **tdbh = NULL;
 static struct tdb **tdbdst = NULL;
@@ -190,6 +189,8 @@ tdb_hash(u_int rdomain, u_int32_t spi, u
 {
        SIPHASH_CTX ctx;
 
+       NET_ASSERT_LOCKED();
+
        SipHash24_Init(&ctx, &tdbkey);
        SipHash24_Update(&ctx, &rdomain, sizeof(rdomain));
        SipHash24_Update(&ctx, &spi, sizeof(spi));
@@ -336,6 +337,8 @@ gettdbbysrcdst(u_int rdomain, u_int32_t 
        struct tdb *tdbp;
        union sockaddr_union su_null;
 
+       NET_ASSERT_LOCKED();
+
        if (tdbsrc == NULL)
                return (struct tdb *) NULL;
 
@@ -420,6 +423,8 @@ gettdbbydst(u_int rdomain, union sockadd
        u_int32_t hashval;
        struct tdb *tdbp;
 
+       NET_ASSERT_LOCKED();
+
        if (tdbdst == NULL)
                return (struct tdb *) NULL;
 
@@ -451,6 +456,8 @@ gettdbbysrc(u_int rdomain, union sockadd
        u_int32_t hashval;
        struct tdb *tdbp;
 
+       NET_ASSERT_LOCKED();
+
        if (tdbsrc == NULL)
                return (struct tdb *) NULL;
 
@@ -788,6 +795,8 @@ tdb_alloc(u_int rdomain)
 {
        struct tdb *tdbp;
 
+       NET_ASSERT_LOCKED();
+
        tdbp = malloc(sizeof(*tdbp), M_TDB, M_WAITOK | M_ZERO);
 
        TAILQ_INIT(&tdbp->tdb_policy_head);
@@ -812,6 +821,8 @@ tdb_free(struct tdb *tdbp)
 {
        struct ipsec_policy *ipo;
 
+       NET_ASSERT_LOCKED();
+
        if (tdbp->tdb_xform) {
                (*(tdbp->tdb_xform->xf_zeroize))(tdbp);
                tdbp->tdb_xform = NULL;
@@ -944,6 +955,8 @@ ipsp_ids_insert(struct ipsec_ids *ids)
        struct ipsec_ids *found;
        u_int32_t start_flow;
 
+       NET_ASSERT_LOCKED();
+
        found = RBT_INSERT(ipsec_ids_tree, &ipsec_ids_tree, ids);
        if (found) {
                /* if refcount was zero, then timeout is running */
@@ -976,6 +989,8 @@ struct ipsec_ids *
 ipsp_ids_lookup(u_int32_t ipsecflowinfo)
 {
        struct ipsec_ids        key;
+
+       NET_ASSERT_LOCKED();
 
        key.id_flow = ipsecflowinfo;
        return RBT_FIND(ipsec_ids_flows, &ipsec_ids_flows, &key);
Index: netinet/ip_ipsp.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.183
diff -u -p -r1.183 ip_ipsp.h
--- netinet/ip_ipsp.h   26 Jun 2017 09:08:00 -0000      1.183
+++ netinet/ip_ipsp.h   11 Oct 2017 14:44:51 -0000
@@ -440,7 +440,6 @@ extern struct auth_hash auth_hash_hmac_r
 extern struct comp_algo comp_algo_deflate;
 
 extern TAILQ_HEAD(ipsec_policy_head, ipsec_policy) ipsec_policy_head;
-extern TAILQ_HEAD(ipsec_acquire_head, ipsec_acquire) ipsec_acquire_head;
 
 /* Misc. */
 #ifdef ENCDEBUG
Index: netinet/ip_spd.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_spd.c,v
retrieving revision 1.92
diff -u -p -r1.92 ip_spd.c
--- netinet/ip_spd.c    6 Apr 2017 14:25:18 -0000       1.92
+++ netinet/ip_spd.c    11 Oct 2017 14:44:51 -0000
@@ -45,7 +45,8 @@ int   ipsp_acquire_sa(struct ipsec_policy 
            union sockaddr_union *, struct sockaddr_encap *, struct mbuf *);
 struct ipsec_acquire *ipsp_pending_acquire(struct ipsec_policy *,
            union sockaddr_union *);
-void   ipsp_delete_acquire(void *);
+void   ipsp_delete_acquire_timo(void *);
+void   ipsp_delete_acquire(struct ipsec_acquire *);
 
 #ifdef ENCDEBUG
 #define        DPRINTF(x)      if (encdebug) printf x
@@ -56,16 +57,21 @@ void        ipsp_delete_acquire(void *);
 struct pool ipsec_policy_pool;
 struct pool ipsec_acquire_pool;
 int ipsec_policy_pool_initialized = 0;
-int ipsec_acquire_pool_initialized = 0;
 
+/* Protected by the NET_LOCK(). */
+int ipsec_acquire_pool_initialized = 0;
 struct radix_node_head **spd_tables;
 unsigned int spd_table_max;
+TAILQ_HEAD(ipsec_acquire_head, ipsec_acquire) ipsec_acquire_head =
+    TAILQ_HEAD_INITIALIZER(ipsec_acquire_head);
 
 struct radix_node_head *
 spd_table_get(unsigned int rtableid)
 {
        unsigned int rdomain;
 
+       NET_ASSERT_LOCKED();
+
        if (spd_tables == NULL)
                return (NULL);
 
@@ -83,6 +89,8 @@ spd_table_add(unsigned int rtableid)
        unsigned int rdomain;
        void *p;
 
+       NET_ASSERT_LOCKED();
+
        rdomain = rtable_l2(rtableid);
        if (spd_tables == NULL || rdomain > spd_table_max) {
                if ((p = mallocarray(rdomain + 1, sizeof(*rnh),
@@ -135,6 +143,8 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
        int signore = 0, dignore = 0;
        u_int rdomain = rtable_l2(m->m_pkthdr.ph_rtableid);
 
+       NET_ASSERT_LOCKED();
+
        /*
         * If there are no flows in place, there's no point
         * continuing with the SPD lookup.
@@ -587,6 +597,8 @@ ipsec_delete_policy(struct ipsec_policy 
        struct radix_node *rn = (struct radix_node *)ipo;
        int err = 0;
 
+       NET_ASSERT_LOCKED();
+
        if (--ipo->ipo_ref_count > 0)
                return 0;
 
@@ -614,13 +626,23 @@ ipsec_delete_policy(struct ipsec_policy 
        return err;
 }
 
+void
+ipsp_delete_acquire_timo(void *v)
+{
+       struct ipsec_acquire *ipa = v;
+
+       NET_LOCK();
+       ipsp_delete_acquire(ipa);
+       NET_UNLOCK();
+}
+
 /*
  * Delete a pending IPsec acquire record.
  */
 void
-ipsp_delete_acquire(void *v)
+ipsp_delete_acquire(struct ipsec_acquire *ipa)
 {
-       struct ipsec_acquire *ipa = v;
+       NET_ASSERT_LOCKED();
 
        timeout_del(&ipa->ipa_timeout);
        TAILQ_REMOVE(&ipsec_acquire_head, ipa, ipa_next);
@@ -639,6 +661,8 @@ ipsp_pending_acquire(struct ipsec_policy
 {
        struct ipsec_acquire *ipa;
 
+       NET_ASSERT_LOCKED();
+
        TAILQ_FOREACH (ipa, &ipo->ipo_acquires, ipa_ipo_next) {
                if (!memcmp(gw, &ipa->ipa_addr, gw->sa.sa_len))
                        return ipa;
@@ -657,6 +681,8 @@ ipsp_acquire_sa(struct ipsec_policy *ipo
 {
        struct ipsec_acquire *ipa;
 
+       NET_ASSERT_LOCKED();
+
        /* Check whether request has been made already. */
        if ((ipa = ipsp_pending_acquire(ipo, gw)) != NULL)
                return 0;
@@ -674,7 +700,7 @@ ipsp_acquire_sa(struct ipsec_policy *ipo
 
        ipa->ipa_addr = *gw;
 
-       timeout_set(&ipa->ipa_timeout, ipsp_delete_acquire, ipa);
+       timeout_set_proc(&ipa->ipa_timeout, ipsp_delete_acquire_timo, ipa);
 
        ipa->ipa_info.sen_len = ipa->ipa_mask.sen_len = SENT_LEN;
        ipa->ipa_info.sen_family = ipa->ipa_mask.sen_family = PF_KEY;

Reply via email to