Diff below makes aesni crypto "mpsafe". It adds a CRYPTOCAP_F_MPSAFE flag that makes the crypto framework dispatch to an mpsafe taskq if it is set. In order to make the aesni_process() function that does the actual crypto work mpsafe, it moves the code over to a per-session working buffer. This obviously increases the cost of setting up a session a bit. Directly invoked crypto operations remain locked. My main laptop seem to do ipsec fine with this, but I'm not sure if there is an actual performance gain. So it would probably be good if somebody could benchmark this.
Comments? Index: arch/amd64/amd64/aesni.c =================================================================== RCS file: /home/cvs/src/sys/arch/amd64/amd64/aesni.c,v retrieving revision 1.36 diff -u -p -r1.36 aesni.c --- arch/amd64/amd64/aesni.c 7 Nov 2015 01:37:26 -0000 1.36 +++ arch/amd64/amd64/aesni.c 24 Mar 2016 19:16:01 -0000 @@ -66,13 +66,14 @@ struct aesni_session { struct swcr_data *ses_swd; LIST_ENTRY(aesni_session) ses_entries; + uint8_t *ses_buf; + size_t ses_buflen; }; struct aesni_softc { - uint8_t *sc_buf; - size_t sc_buflen; int32_t sc_cid; uint32_t sc_sid; + struct mutex sc_mtx; LIST_HEAD(, aesni_session) sc_sessions; } *aesni_sc; @@ -132,10 +133,6 @@ aesni_setup(void) if (aesni_sc == NULL) return; - aesni_sc->sc_buf = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); - if (aesni_sc->sc_buf != NULL) - aesni_sc->sc_buflen = PAGE_SIZE; - bzero(algs, sizeof(algs)); /* Encryption algorithms. */ @@ -161,9 +158,8 @@ aesni_setup(void) /* IPsec Extended Sequence Numbers. */ algs[CRYPTO_ESN] = CRYPTO_ALG_FLAG_SUPPORTED; - aesni_sc->sc_cid = crypto_get_driverid(0); + aesni_sc->sc_cid = crypto_get_driverid(CRYPTOCAP_F_MPSAFE); if (aesni_sc->sc_cid < 0) { - free(aesni_sc->sc_buf, M_DEVBUF, aesni_sc->sc_buflen); free(aesni_sc, M_DEVBUF, sizeof(*aesni_sc)); return; } @@ -172,6 +168,8 @@ aesni_setup(void) "aesni", NULL); pool_setlowat(&aesnipl, 2); + mtx_init(&aesni_sc->sc_mtx, IPL_HIGH); + crypto_register(aesni_sc->sc_cid, algs, aesni_newsession, aesni_freesession, aesni_process); } @@ -192,7 +190,14 @@ aesni_newsession(u_int32_t *sidp, struct ses = pool_get(&aesnipl, PR_NOWAIT | PR_ZERO); if (!ses) return (ENOMEM); + + ses->ses_buf = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); + if (ses->ses_buf != NULL) + ses->ses_buflen = PAGE_SIZE; + + mtx_enter(&aesni_sc->sc_mtx); LIST_INSERT_HEAD(&aesni_sc->sc_sessions, ses, ses_entries); + mtx_leave(&aesni_sc->sc_mtx); ses->ses_sid = ++aesni_sc->sc_sid; for (c = cri; c != NULL; c = c->cri_next) { @@ -342,15 +347,19 @@ aesni_freesession(u_int64_t tid) struct auth_hash *axf; u_int32_t sid = (u_int32_t)tid; + mtx_enter(&aesni_sc->sc_mtx); LIST_FOREACH(ses, &aesni_sc->sc_sessions, ses_entries) { if (ses->ses_sid == sid) break; } + mtx_leave(&aesni_sc->sc_mtx); if (ses == NULL) return (EINVAL); + mtx_enter(&aesni_sc->sc_mtx); LIST_REMOVE(ses, ses_entries); + mtx_leave(&aesni_sc->sc_mtx); if (ses->ses_ghash) { explicit_bzero(ses->ses_ghash, sizeof(GHASH_CTX)); @@ -377,6 +386,11 @@ aesni_freesession(u_int64_t tid) free(swd, M_CRYPTO_DATA, sizeof(*swd)); } + if (ses->ses_buf) { + explicit_bzero(ses->ses_buf, ses->ses_buflen); + free(ses->ses_buf, M_DEVBUF, ses->ses_buflen); + } + explicit_bzero(ses, sizeof (*ses)); pool_put(&aesnipl, ses); @@ -405,24 +419,26 @@ aesni_encdec(struct cryptop *crp, struct uint8_t iv[EALG_MAX_BLOCK_LEN]; uint8_t icb[AESCTR_BLOCKSIZE]; uint8_t tag[GMAC_DIGEST_LEN]; - uint8_t *buf = aesni_sc->sc_buf; + uint8_t *buf = ses->ses_buf; uint32_t *dw; aadlen = rlen = err = iskip = oskip = 0; - if (crd->crd_len > aesni_sc->sc_buflen) { + if (crd->crd_len > ses->ses_buflen) { + KERNEL_LOCK(); if (buf != NULL) { - explicit_bzero(buf, aesni_sc->sc_buflen); - free(buf, M_DEVBUF, aesni_sc->sc_buflen); + explicit_bzero(buf, ses->ses_buflen); + free(buf, M_DEVBUF, ses->ses_buflen); } - aesni_sc->sc_buflen = 0; + ses->ses_buflen = 0; rlen = roundup(crd->crd_len, EALG_MAX_BLOCK_LEN); - aesni_sc->sc_buf = buf = malloc(rlen, M_DEVBUF, M_NOWAIT | + ses->ses_buf = buf = malloc(rlen, M_DEVBUF, M_NOWAIT | M_ZERO); + KERNEL_UNLOCK(); if (buf == NULL) return (ENOMEM); - aesni_sc->sc_buflen = rlen; + ses->ses_buflen = rlen; } /* CBC uses 16, CTR/XTS only 8. */ @@ -604,10 +620,12 @@ aesni_process(struct cryptop *crp) if (crp == NULL || crp->crp_callback == NULL) return (EINVAL); + mtx_enter(&aesni_sc->sc_mtx); LIST_FOREACH(ses, &aesni_sc->sc_sessions, ses_entries) { if (ses->ses_sid == (crp->crp_sid & 0xffffffff)) break; } + mtx_leave(&aesni_sc->sc_mtx); if (!ses) { err = EINVAL; Index: crypto/crypto.c =================================================================== RCS file: /home/cvs/src/sys/crypto/crypto.c,v retrieving revision 1.75 diff -u -p -r1.75 crypto.c --- crypto/crypto.c 28 Aug 2015 00:03:53 -0000 1.75 +++ crypto/crypto.c 29 Aug 2015 20:01:25 -0000 @@ -36,6 +36,7 @@ struct pool cryptop_pool; struct pool cryptodesc_pool; struct taskq *crypto_taskq; +struct taskq *crypto_taskq_mpsafe; /* * Create a new session. @@ -371,9 +372,21 @@ crypto_unregister(u_int32_t driverid, in int crypto_dispatch(struct cryptop *crp) { - if (crypto_taskq && !(crp->crp_flags & CRYPTO_F_NOQUEUE)) { + struct taskq *tq = crypto_taskq; + int s; + u_int32_t hid; + + s = splvm(); + hid = (crp->crp_sid >> 32) & 0xffffffff; + if (hid < crypto_drivers_num) { + if (crypto_drivers[hid].cc_flags & CRYPTOCAP_F_MPSAFE) + tq = crypto_taskq_mpsafe; + } + splx(s); + + if (tq && !(crp->crp_flags & CRYPTO_F_NOQUEUE)) { task_set(&crp->crp_task, (void (*))crypto_invoke, crp); - task_add(crypto_taskq, &crp->crp_task); + task_add(tq, &crp->crp_task); } else { crypto_invoke(crp); } @@ -498,6 +511,7 @@ void crypto_init(void) { crypto_taskq = taskq_create("crypto", 1, IPL_VM, 0); + crypto_taskq_mpsafe = taskq_create("crynlk", 1, IPL_VM|IPL_MPSAFE, 0); pool_init(&cryptop_pool, sizeof(struct cryptop), 0, 0, 0, "cryptop", NULL); Index: crypto/cryptodev.h =================================================================== RCS file: /home/cvs/src/sys/crypto/cryptodev.h,v retrieving revision 1.67 diff -u -p -r1.67 cryptodev.h --- crypto/cryptodev.h 10 Dec 2015 21:00:51 -0000 1.67 +++ crypto/cryptodev.h 12 Dec 2015 12:52:57 -0000 @@ -208,6 +208,7 @@ struct cryptocap { u_int8_t cc_flags; #define CRYPTOCAP_F_CLEANUP 0x01 #define CRYPTOCAP_F_SOFTWARE 0x02 +#define CRYPTOCAP_F_MPSAFE 0x04 int (*cc_newsession) (u_int32_t *, struct cryptoini *); int (*cc_process) (struct cryptop *);