In order to reduce the number of invocations of the RFC7539 template
into the Poly1305 driver, implement the new internal .update_from_sg
method that allows the driver to amortize the cost of FPU preserve/
restore sequences over a larger chunk of input.

Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
 arch/x86/crypto/poly1305_glue.c | 54 ++++++++++++++++----
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4a1c05dce950..f2afaa8e23c2 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -115,18 +115,11 @@ static unsigned int poly1305_simd_blocks(struct 
poly1305_desc_ctx *dctx,
        return srclen;
 }
 
-static int poly1305_simd_update(struct shash_desc *desc,
-                               const u8 *src, unsigned int srclen)
+static void poly1305_simd_do_update(struct shash_desc *desc,
+                                   const u8 *src, unsigned int srclen)
 {
-       struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
        unsigned int bytes;
 
-       /* kernel_fpu_begin/end is costly, use fallback for small updates */
-       if (srclen <= 288 || !crypto_simd_usable())
-               return crypto_poly1305_update(desc, src, srclen);
-
-       kernel_fpu_begin();
-
        if (unlikely(dctx->buflen)) {
                bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
                memcpy(dctx->buf + dctx->buflen, src, bytes);
@@ -147,12 +140,50 @@ static int poly1305_simd_update(struct shash_desc *desc,
                srclen = bytes;
        }
 
-       kernel_fpu_end();
-
        if (unlikely(srclen)) {
                dctx->buflen = srclen;
                memcpy(dctx->buf, src, srclen);
        }
+}
+
+static int poly1305_simd_update(struct shash_desc *desc,
+                               const u8 *src, unsigned int srclen)
+{
+       /* kernel_fpu_begin/end is costly, use fallback for small updates */
+       if (srclen <= 288 || !crypto_simd_usable())
+               return crypto_poly1305_update(desc, src, srclen);
+
+       kernel_fpu_begin();
+       poly1305_simd_do_update(desc, src, srclen);
+       kernel_fpu_end();
+
+       return 0;
+}
+
+static int poly1305_simd_update_from_sg(struct shash_desc *desc,
+                                       struct scatterlist *sg,
+                                       unsigned int srclen,
+                                       int flags)
+{
+       bool do_simd = crypto_simd_usable() && srclen > 288;
+       struct crypto_hash_walk walk;
+       int nbytes;
+
+       if (do_simd) {
+               kernel_fpu_begin();
+               flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       }
+
+       for (nbytes = crypto_shash_walk_sg(desc, sg, srclen, &walk, flags);
+            nbytes > 0;
+            nbytes = crypto_hash_walk_done(&walk, 0)) {
+               if (do_simd)
+                       poly1305_simd_do_update(desc, walk.data, nbytes);
+               else
+                       crypto_poly1305_update(desc, walk.data, nbytes);
+       }
+       if (do_simd)
+               kernel_fpu_end();
 
        return 0;
 }
@@ -161,6 +192,7 @@ static struct shash_alg alg = {
        .digestsize     = POLY1305_DIGEST_SIZE,
        .init           = poly1305_simd_init,
        .update         = poly1305_simd_update,
+       .update_from_sg = poly1305_simd_update_from_sg,
        .final          = crypto_poly1305_final,
        .descsize       = sizeof(struct poly1305_simd_desc_ctx),
        .base           = {
-- 
2.20.1

Reply via email to