Instead of copying the calculated authentication tag to memory and
calling crypto_memneq() to verify it, use vector bytewise compare and
min across vector instructions to decide whether the tag is valid. This
is more efficient, and given that the tag is only transiently held in a
NEON register, it is also safer, given that calculated tags for failed
decryptions should be withheld.

Signed-off-by: Ard Biesheuvel <a...@kernel.org>
---
 arch/arm64/crypto/ghash-ce-core.S | 15 ++++++++++
 arch/arm64/crypto/ghash-ce-glue.c | 46 +++++++++++++++++++------------
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S 
b/arch/arm64/crypto/ghash-ce-core.S
index 6b958dcdf136..7868330dd54e 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -544,7 +544,22 @@ CPU_LE(    rev             w8, w8          )
        ext             XL.16b, XL.16b, XL.16b, #8
        rev64           XL.16b, XL.16b
        eor             XL.16b, XL.16b, KS0.16b
+
+       .if             \enc == 1
        st1             {XL.16b}, [x10]                 // store tag
+       .else
+       ldp             x11, x12, [sp, #40]             // load tag pointer and 
authsize
+       adr_l           x17, .Lpermute_table
+       ld1             {KS0.16b}, [x11]                // load supplied tag
+       add             x17, x17, x12
+       ld1             {KS1.16b}, [x17]                // load permute vector
+
+       cmeq            XL.16b, XL.16b, KS0.16b         // compare tags
+       mvn             XL.16b, XL.16b                  // -1 for fail, 0 for 
pass
+       tbl             XL.16b, {XL.16b}, KS1.16b       // keep authsize bytes 
only
+       sminv           b0, XL.16b                      // signed minimum 
across XL
+       smov            w0, v0.b[0]                     // return b0
+       .endif
 
 4:     ldp             x29, x30, [sp], #32
        ret
diff --git a/arch/arm64/crypto/ghash-ce-glue.c 
b/arch/arm64/crypto/ghash-ce-glue.c
index 8536008e3e35..405923e3be4a 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -55,10 +55,10 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], 
const char *src,
 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
                                  u64 const h[][2], u64 dg[], u8 ctr[],
                                  u32 const rk[], int rounds, u8 tag[]);
-
-asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
-                                 u64 const h[][2], u64 dg[], u8 ctr[],
-                                 u32 const rk[], int rounds, u8 tag[]);
+asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+                                u64 const h[][2], u64 dg[], u8 ctr[],
+                                u32 const rk[], int rounds, const u8 l[],
+                                const u8 tag[], u64 authsize);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -458,6 +458,7 @@ static int gcm_decrypt(struct aead_request *req)
        unsigned int authsize = crypto_aead_authsize(aead);
        int nrounds = num_rounds(&ctx->aes_key);
        struct skcipher_walk walk;
+       u8 otag[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u8 iv[AES_BLOCK_SIZE];
        u64 dg[2] = {};
@@ -474,9 +475,15 @@ static int gcm_decrypt(struct aead_request *req)
        memcpy(iv, req->iv, GCM_IV_SIZE);
        put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
+       scatterwalk_map_and_copy(otag, req->src,
+                                req->assoclen + req->cryptlen - authsize,
+                                authsize, 0);
+
        err = skcipher_walk_aead_decrypt(&walk, req, false);
 
        if (likely(crypto_simd_usable())) {
+               int ret;
+
                do {
                        const u8 *src = walk.src.virt.addr;
                        u8 *dst = walk.dst.virt.addr;
@@ -493,9 +500,10 @@ static int gcm_decrypt(struct aead_request *req)
                        }
 
                        kernel_neon_begin();
-                       pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
-                                         dg, iv, ctx->aes_key.key_enc, nrounds,
-                                         tag);
+                       ret = pmull_gcm_decrypt(nbytes, dst, src,
+                                               ctx->ghash_key.h,
+                                               dg, iv, ctx->aes_key.key_enc,
+                                               nrounds, tag, otag, authsize);
                        kernel_neon_end();
 
                        if (unlikely(!nbytes))
@@ -507,6 +515,11 @@ static int gcm_decrypt(struct aead_request *req)
 
                        err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
                } while (walk.nbytes);
+
+               if (err)
+                       return err;
+               if (ret)
+                       return -EBADMSG;
        } else {
                while (walk.nbytes >= AES_BLOCK_SIZE) {
                        int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -548,23 +561,20 @@ static int gcm_decrypt(struct aead_request *req)
                        err = skcipher_walk_done(&walk, 0);
                }
 
+               if (err)
+                       return err;
+
                put_unaligned_be64(dg[1], tag);
                put_unaligned_be64(dg[0], tag + 8);
                put_unaligned_be32(1, iv + GCM_IV_SIZE);
                aes_encrypt(&ctx->aes_key, iv, iv);
                crypto_xor(tag, iv, AES_BLOCK_SIZE);
-       }
-
-       if (err)
-               return err;
 
-       /* compare calculated auth tag with the stored one */
-       scatterwalk_map_and_copy(buf, req->src,
-                                req->assoclen + req->cryptlen - authsize,
-                                authsize, 0);
-
-       if (crypto_memneq(tag, buf, authsize))
-               return -EBADMSG;
+               if (crypto_memneq(tag, otag, authsize)) {
+                       memzero_explicit(tag, AES_BLOCK_SIZE));
+                       return -EBADMSG;
+               }
+       }
        return 0;
 }
 
-- 
2.17.1

Reply via email to