There are two parts to accelerating AES-GCM with special instructions
on amd64: AESNI for AES and PCLMUL for GMAC.  Currently we link
those: If a machine has AESNI, use AESNI and PCLMUL, otherwise do
everything in software.

For the last few years, all low-end Intel CPUs (Celeron, Pentium G,
i3) have included PCLMUL even if they don't have AESNI.  Here's a
patch that uses the optimized PCLMUL code on these CPUs.  A quick
check with tcpbench(1) suggests that this about doubles the throughput
for an aes-128-gcm security association.

To allow testing on well-endowed CPUs, I have unhooked the AESNI
path in identifycpu().  This will obviously not be part of the final
commit.

This version includes early feedback from mikeb@ that we need
fpu_kernel_enter() before calling aesni_gmac_update().


Index: crypto/gmac.c
===================================================================
RCS file: /cvs/src/sys/crypto/gmac.c,v
retrieving revision 1.4
diff -u -p -r1.4 gmac.c
--- crypto/gmac.c       12 Nov 2014 17:52:02 -0000      1.4
+++ crypto/gmac.c       2 Nov 2015 22:29:39 -0000
@@ -29,7 +29,10 @@
 #include <crypto/gmac.h>
 
 void   ghash_gfmul(uint32_t *, uint32_t *, uint32_t *);
-void   ghash_update(GHASH_CTX *, uint8_t *, size_t);
+void   ghash_update_mi(GHASH_CTX *, uint8_t *, size_t);
+
+/* Allow overriding with optimized MD function */
+void   (*ghash_update)(GHASH_CTX *, uint8_t *, size_t) = ghash_update_mi;
 
 /* Computes a block multiplication in the GF(2^128) */
 void
@@ -70,7 +73,7 @@ ghash_gfmul(uint32_t *X, uint32_t *Y, ui
 }
 
 void
-ghash_update(GHASH_CTX *ctx, uint8_t *X, size_t len)
+ghash_update_mi(GHASH_CTX *ctx, uint8_t *X, size_t len)
 {
        uint32_t        *x = (uint32_t *)X;
        uint32_t        *s = (uint32_t *)ctx->S;
@@ -131,11 +134,12 @@ AES_GMAC_Update(AES_GMAC_CTX *ctx, const
        if (len > 0) {
                plen = len % GMAC_BLOCK_LEN;
                if (len >= GMAC_BLOCK_LEN)
-                       ghash_update(&ctx->ghash, (uint8_t *)data, len - plen);
+                       (*ghash_update)(&ctx->ghash, (uint8_t *)data,
+                           len - plen);
                if (plen) {
                        bcopy((uint8_t *)data + (len - plen), (uint8_t *)blk,
                            plen);
-                       ghash_update(&ctx->ghash, (uint8_t *)blk,
+                       (*ghash_update)(&ctx->ghash, (uint8_t *)blk,
                            GMAC_BLOCK_LEN);
                }
        }
Index: crypto/gmac.h
===================================================================
RCS file: /cvs/src/sys/crypto/gmac.h,v
retrieving revision 1.2
diff -u -p -r1.2 gmac.h
--- crypto/gmac.h       5 Dec 2012 23:20:15 -0000       1.2
+++ crypto/gmac.h       3 Nov 2015 15:24:52 -0000
@@ -38,6 +38,8 @@ typedef struct _AES_GMAC_CTX {
 } AES_GMAC_CTX;
 
 __BEGIN_DECLS
+extern void (*ghash_update)(GHASH_CTX *, uint8_t *, size_t);
+
 void   AES_GMAC_Init(AES_GMAC_CTX *);
 void   AES_GMAC_Setkey(AES_GMAC_CTX *, const uint8_t *, uint16_t);
 void   AES_GMAC_Reinit(AES_GMAC_CTX *, const uint8_t *, uint16_t);
Index: arch/amd64/amd64/aesni.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/aesni.c,v
retrieving revision 1.35
diff -u -p -r1.35 aesni.c
--- arch/amd64/amd64/aesni.c    28 Aug 2015 19:59:36 -0000      1.35
+++ arch/amd64/amd64/aesni.c    3 Nov 2015 15:33:42 -0000
@@ -120,6 +120,9 @@ int aesni_swauth(struct cryptop *, struc
 int    aesni_encdec(struct cryptop *, struct cryptodesc *,
            struct cryptodesc *, struct aesni_session *);
 
+void   pclmul_setup(void);
+void   ghash_update_pclmul(GHASH_CTX *, uint8_t *, size_t);
+
 void
 aesni_setup(void)
 {
@@ -662,4 +665,18 @@ out:
        crp->crp_etype = err;
        crypto_done(crp);
        return (err);
+}
+
+void
+pclmul_setup(void)
+{
+       ghash_update = ghash_update_pclmul;
+}
+
+void
+ghash_update_pclmul(GHASH_CTX *ghash, uint8_t *src, size_t len)
+{
+       fpu_kernel_enter();
+       aesni_gmac_update(ghash, src, len);
+       fpu_kernel_exit();
 }
Index: arch/amd64/amd64/autoconf.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/autoconf.c,v
retrieving revision 1.43
diff -u -p -r1.43 autoconf.c
--- arch/amd64/amd64/autoconf.c 17 Jul 2015 21:53:56 -0000      1.43
+++ arch/amd64/amd64/autoconf.c 2 Nov 2015 22:57:14 -0000
@@ -97,6 +97,9 @@ void          rdrand(void *);
 void           viac3_crypto_setup(void);
 extern int     amd64_has_xcrypt;
 
+void           pclmul_setup(void);
+extern int     amd64_has_pclmul;
+
 void           aesni_setup(void);
 extern int     amd64_has_aesni;
 #endif
@@ -145,6 +148,9 @@ cpu_configure(void)
         */
        if (amd64_has_xcrypt)
                viac3_crypto_setup();
+
+       if (amd64_has_pclmul)
+               pclmul_setup();
 
        if (amd64_has_aesni)
                aesni_setup();
Index: arch/amd64/amd64/identcpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.64
diff -u -p -r1.64 identcpu.c
--- arch/amd64/amd64/identcpu.c 12 Aug 2015 05:31:41 -0000      1.64
+++ arch/amd64/amd64/identcpu.c 3 Nov 2015 15:25:49 -0000
@@ -52,6 +52,7 @@ int cpuspeed;
 
 int amd64_has_xcrypt;
 #ifdef CRYPTO
+int amd64_has_pclmul;
 int amd64_has_aesni;
 #endif
 int has_rdrand;
@@ -560,8 +561,11 @@ identifycpu(struct cpu_info *ci)
                        setperf_setup = est_init;
 
 #ifdef CRYPTO
+               if (cpu_ecxfeature & CPUIDECX_PCLMUL)
+                       amd64_has_pclmul = 1;
+
                if (cpu_ecxfeature & CPUIDECX_AES)
-                       amd64_has_aesni = 1;
+                       /*amd64_has_aesni = 1*/;
 #endif
 
                if (cpu_ecxfeature & CPUIDECX_RDRAND)
-- 
Christian "naddy" Weisgerber                          na...@mips.inka.de

Reply via email to