from:"Eric Biggers"

[PATCH] crypto: api - Remove no-op exit_ops code

2016-10-07 Thread Eric Biggers

crypto_exit_cipher_ops() and crypto_exit_compress_ops() are no-ops and
have been for a long time, so remove them.

Signed-off-by: Eric Biggers 
---
 crypto/api.c  | 20 ++--
 crypto/cipher.c   |  4 
 crypto/compress.c |  4 
 crypto/internal.h |  3 ---
 4 files changed, 2 insertions(+), 29 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index bbc147c..a88729f 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -310,24 +310,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 {
const struct crypto_type *type = tfm->__crt_alg->cra_type;
 
-   if (type) {
-   if (tfm->exit)
-   tfm->exit(tfm);
-   return;
-   }
-
-   switch (crypto_tfm_alg_type(tfm)) {
-   case CRYPTO_ALG_TYPE_CIPHER:
-   crypto_exit_cipher_ops(tfm);
-   break;
-
-   case CRYPTO_ALG_TYPE_COMPRESS:
-   crypto_exit_compress_ops(tfm);
-   break;
-
-   default:
-   BUG();
-   }
+   if (type && tfm->exit)
+   tfm->exit(tfm);
 }
 
 static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 39541e0..94fa355 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -116,7 +116,3 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 
return 0;
 }
-
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm)
-{
-}
diff --git a/crypto/compress.c b/crypto/compress.c
index c33f076..f2d5229 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -42,7 +42,3 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm)
 
return 0;
 }
-
-void crypto_exit_compress_ops(struct crypto_tfm *tfm)
-{
-}
diff --git a/crypto/internal.h b/crypto/internal.h
index 7eefcdb..f073204 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -76,9 +76,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, 
u32 type, u32 mask);
 int crypto_init_cipher_ops(struct crypto_tfm *tfm);
 int crypto_init_compress_ops(struct crypto_tfm *tfm);
 
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
-void crypto_exit_compress_ops(struct crypto_tfm *tfm);
-
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 
mask);
 void crypto_larval_kill(struct crypto_alg *alg);
 struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] crypto: skcipher - Remove unused crypto_lookup_skcipher() declaration

2016-10-07 Thread Eric Biggers

The definition of crypto_lookup_skcipher() was already removed in
commit 3a01d0ee2b99 ("crypto: skcipher - Remove top-level givcipher
interface").  So the declaration should be removed too.

Signed-off-by: Eric Biggers 
---
 include/crypto/internal/skcipher.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/crypto/internal/skcipher.h 
b/include/crypto/internal/skcipher.h
index a21a95e..95d2a18 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -74,8 +74,6 @@ static inline int crypto_grab_skcipher2(struct 
crypto_skcipher_spawn *spawn,
return crypto_grab_skcipher(spawn, name, type, mask);
 }
 
-struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 
mask);
-
 static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn)
 {
crypto_drop_spawn(&spawn->base);
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] crypto: cmac - return -EINVAL if block size is unsupported

2016-10-10 Thread Eric Biggers

cmac_create() previously returned 0 if a cipher with a block size other
than 8 or 16 bytes was specified.  It should return -EINVAL instead.
Granted, this doesn't actually change any behavior because cryptomgr
currently ignores any return value other than -EAGAIN from template
->create() functions.

Signed-off-by: Eric Biggers 
---
 crypto/cmac.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/cmac.c b/crypto/cmac.c
index 7a8bfbd..b6c4059 100644
--- a/crypto/cmac.c
+++ b/crypto/cmac.c
@@ -243,6 +243,7 @@ static int cmac_create(struct crypto_template *tmpl, struct 
rtattr **tb)
case 8:
break;
default:
+   err = -EINVAL;
goto out_put_alg;
}
 
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] crypto: cmac - fix alignment of 'consts'

2016-10-10 Thread Eric Biggers

The per-transform 'consts' array is accessed as __be64 in
crypto_cmac_digest_setkey() but was only guaranteed to be aligned to
__alignof__(long).  Fix this by aligning it to __alignof__(__be64).

Signed-off-by: Eric Biggers 
---
 crypto/cmac.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/crypto/cmac.c b/crypto/cmac.c
index b6c4059..04080dc 100644
--- a/crypto/cmac.c
+++ b/crypto/cmac.c
@@ -57,7 +57,8 @@ static int crypto_cmac_digest_setkey(struct crypto_shash 
*parent,
unsigned long alignmask = crypto_shash_alignmask(parent);
struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
unsigned int bs = crypto_shash_blocksize(parent);
-   __be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
+   __be64 *consts = PTR_ALIGN((void *)ctx->ctx,
+  (alignmask | (__alignof__(__be64) - 1)) + 1);
u64 _const[2];
int i, err = 0;
u8 msb_mask, gfmask;
@@ -173,7 +174,8 @@ static int crypto_cmac_digest_final(struct shash_desc 
*pdesc, u8 *out)
struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
struct crypto_cipher *tfm = tctx->child;
int bs = crypto_shash_blocksize(parent);
-   u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1);
+   u8 *consts = PTR_ALIGN((void *)tctx->ctx,
+  (alignmask | (__alignof__(__be64) - 1)) + 1);
u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
u8 *prev = odds + bs;
unsigned int offset = 0;
@@ -258,7 +260,8 @@ static int cmac_create(struct crypto_template *tmpl, struct 
rtattr **tb)
if (err)
goto out_free_inst;
 
-   alignmask = alg->cra_alignmask | (sizeof(long) - 1);
+   /* We access the data as u32s when xoring. */
+   alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
inst->alg.base.cra_alignmask = alignmask;
inst->alg.base.cra_priority = alg->cra_priority;
inst->alg.base.cra_blocksize = alg->cra_blocksize;
@@ -270,7 +273,9 @@ static int cmac_create(struct crypto_template *tmpl, struct 
rtattr **tb)
+ alg->cra_blocksize * 2;
 
inst->alg.base.cra_ctxsize =
-   ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1)
+   ALIGN(sizeof(struct cmac_tfm_ctx), crypto_tfm_ctx_alignment())
+   + ((alignmask | (__alignof__(__be64) - 1)) &
+  ~(crypto_tfm_ctx_alignment() - 1))
+ alg->cra_blocksize * 2;
 
inst->alg.base.cra_init = cmac_init_tfm;
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] crypto: cmac - fix alignment of 'consts'

2016-10-10 Thread Eric Biggers

On Mon, Oct 10, 2016 at 10:29:55AM -0700, Joe Perches wrote:
> On Mon, 2016-10-10 at 10:15 -0700, Eric Biggers wrote:
> > The per-transform 'consts' array is accessed as __be64 in
> > crypto_cmac_digest_setkey() but was only guaranteed to be aligned to
> > __alignof__(long).  Fix this by aligning it to __alignof__(__be64).
> []
> > diff --git a/crypto/cmac.c b/crypto/cmac.c
> []
> > @@ -57,7 +57,8 @@ static int crypto_cmac_digest_setkey(struct crypto_shash 
> > *parent,
> > unsigned long alignmask = crypto_shash_alignmask(parent);
> > struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
> > unsigned int bs = crypto_shash_blocksize(parent);
> > -   __be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
> > +   __be64 *consts = PTR_ALIGN((void *)ctx->ctx,
> > +  (alignmask | (__alignof__(__be64) - 1)) + 1);
> 
> Using a bitwise or looks very odd there.  Perhaps:
> 
>   min(alignmask + 1, __alignof__(__be64))
> 

Alignment has to be a power of 2.  From the code I've read, crypto drivers work
with alignment a lot and use bitwise OR to mean "the more restrictive of these
alignmasks".  So I believe the way it's written is the preferred style.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] crypto: cmac - fix alignment of 'consts'

2016-10-10 Thread Eric Biggers

On Mon, Oct 10, 2016 at 10:51:14AM -0700, Joe Perches wrote:
> 
> Hey Eric.
> 
> I don't see any PTR_ALIGN uses in crypto/ or drivers/crypto/ that
> use a bitwise or, just mask + 1, but I believe the effect is the
> same.  Anyway, your choice, but I think using min is clearer.
> 
> cheers, Joe

Usually the bitwise OR is used when setting cra_alignmask in the 'struct
crypto_alg'.  Indeed, the problem could be solved by setting

inst->alg.base.cra_alignmask = alg->cra_alignmask | 
(__alignof__(__be64) - 1);

I decided against that because it would always force 8-byte alignment for CMAC
input buffers and keys, when in fact they don't need that level of alignment
unless the underlying block cipher requires it.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] crypto: skcipher - Get rid of crypto_spawn_skcipher2()

2016-10-28 Thread Eric Biggers

Since commit 3a01d0ee2b99 ("crypto: skcipher - Remove top-level
givcipher interface"), crypto_spawn_skcipher2() and
crypto_spawn_skcipher() are equivalent.  So switch callers of
crypto_spawn_skcipher2() to crypto_spawn_skcipher() and remove it.

Signed-off-by: Eric Biggers 
---
 crypto/authenc.c   | 2 +-
 crypto/authencesn.c| 2 +-
 crypto/ccm.c   | 2 +-
 crypto/chacha20poly1305.c  | 2 +-
 crypto/ctr.c   | 2 +-
 crypto/cts.c   | 2 +-
 crypto/gcm.c   | 2 +-
 include/crypto/internal/skcipher.h | 6 --
 8 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/crypto/authenc.c b/crypto/authenc.c
index 03d5edc..875470b 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -324,7 +324,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
if (IS_ERR(auth))
return PTR_ERR(auth);
 
-   enc = crypto_spawn_skcipher2(&ictx->enc);
+   enc = crypto_spawn_skcipher(&ictx->enc);
err = PTR_ERR(enc);
if (IS_ERR(enc))
goto err_free_ahash;
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index bad6ef4..6f8f6b8 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -342,7 +342,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead 
*tfm)
if (IS_ERR(auth))
return PTR_ERR(auth);
 
-   enc = crypto_spawn_skcipher2(&ictx->enc);
+   enc = crypto_spawn_skcipher(&ictx->enc);
err = PTR_ERR(enc);
if (IS_ERR(enc))
goto err_free_ahash;
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 67e3636..26b924d 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -462,7 +462,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
if (IS_ERR(cipher))
return PTR_ERR(cipher);
 
-   ctr = crypto_spawn_skcipher2(&ictx->ctr);
+   ctr = crypto_spawn_skcipher(&ictx->ctr);
err = PTR_ERR(ctr);
if (IS_ERR(ctr))
goto err_free_cipher;
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 66291d4..db1bc31 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -532,7 +532,7 @@ static int chachapoly_init(struct crypto_aead *tfm)
if (IS_ERR(poly))
return PTR_ERR(poly);
 
-   chacha = crypto_spawn_skcipher2(&ictx->chacha);
+   chacha = crypto_spawn_skcipher(&ictx->chacha);
if (IS_ERR(chacha)) {
crypto_free_ahash(poly);
return PTR_ERR(chacha);
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 57114b1..a9a7a44 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -312,7 +312,7 @@ static int crypto_rfc3686_init_tfm(struct crypto_skcipher 
*tfm)
unsigned long align;
unsigned int reqsize;
 
-   cipher = crypto_spawn_skcipher2(spawn);
+   cipher = crypto_spawn_skcipher(spawn);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
 
diff --git a/crypto/cts.c b/crypto/cts.c
index 8883b62..00254d7 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -290,7 +290,7 @@ static int crypto_cts_init_tfm(struct crypto_skcipher *tfm)
unsigned bsize;
unsigned align;
 
-   cipher = crypto_spawn_skcipher2(spawn);
+   cipher = crypto_spawn_skcipher(spawn);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
 
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 5f11b80..b7ad808 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -575,7 +575,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm)
if (IS_ERR(ghash))
return PTR_ERR(ghash);
 
-   ctr = crypto_spawn_skcipher2(&ictx->ctr);
+   ctr = crypto_spawn_skcipher(&ictx->ctr);
err = PTR_ERR(ctr);
if (IS_ERR(ctr))
goto err_free_hash;
diff --git a/include/crypto/internal/skcipher.h 
b/include/crypto/internal/skcipher.h
index 558f5c9..7a7e815 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -91,12 +91,6 @@ static inline struct crypto_skcipher *crypto_spawn_skcipher(
return crypto_spawn_tfm2(&spawn->base);
 }
 
-static inline struct crypto_skcipher *crypto_spawn_skcipher2(
-   struct crypto_skcipher_spawn *spawn)
-{
-   return crypto_spawn_skcipher(spawn);
-}
-
 static inline void crypto_skcipher_set_reqsize(
struct crypto_skcipher *skcipher, unsigned int reqsize)
 {
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] crypto: skcipher - Get rid of crypto_grab_skcipher2()

2016-10-28 Thread Eric Biggers

Since commit 3a01d0ee2b99 ("crypto: skcipher - Remove top-level
givcipher interface"), crypto_grab_skcipher2() and
crypto_grab_skcipher() are equivalent.  So switch callers of
crypto_grab_skcipher2() to crypto_grab_skcipher() and remove it.

Signed-off-by: Eric Biggers 
---
 crypto/authenc.c   | 6 +++---
 crypto/authencesn.c| 6 +++---
 crypto/ccm.c   | 6 +++---
 crypto/chacha20poly1305.c  | 6 +++---
 crypto/ctr.c   | 6 +++---
 crypto/cts.c   | 6 +++---
 crypto/gcm.c   | 6 +++---
 include/crypto/internal/skcipher.h | 6 --
 8 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/crypto/authenc.c b/crypto/authenc.c
index a7e1ac7..03d5edc 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -420,9 +420,9 @@ static int crypto_authenc_create(struct crypto_template 
*tmpl,
goto err_free_inst;
 
crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-   err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
-   crypto_requires_sync(algt->type,
-algt->mask));
+   err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
+  crypto_requires_sync(algt->type,
+   algt->mask));
if (err)
goto err_drop_auth;
 
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 121010a..bad6ef4 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -441,9 +441,9 @@ static int crypto_authenc_esn_create(struct crypto_template 
*tmpl,
goto err_free_inst;
 
crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-   err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
-   crypto_requires_sync(algt->type,
-algt->mask));
+   err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
+  crypto_requires_sync(algt->type,
+   algt->mask));
if (err)
goto err_drop_auth;
 
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 006d857..67e3636 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -544,9 +544,9 @@ static int crypto_ccm_create_common(struct crypto_template 
*tmpl,
goto err_free_inst;
 
crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst));
-   err = crypto_grab_skcipher2(&ictx->ctr, ctr_name, 0,
-   crypto_requires_sync(algt->type,
-algt->mask));
+   err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0,
+  crypto_requires_sync(algt->type,
+   algt->mask));
if (err)
goto err_drop_cipher;
 
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index e899ef5..66291d4 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -625,9 +625,9 @@ static int chachapoly_create(struct crypto_template *tmpl, 
struct rtattr **tb,
goto err_free_inst;
 
crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst));
-   err = crypto_grab_skcipher2(&ctx->chacha, chacha_name, 0,
-   crypto_requires_sync(algt->type,
-algt->mask));
+   err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0,
+  crypto_requires_sync(algt->type,
+   algt->mask));
if (err)
goto err_drop_poly;
 
diff --git a/crypto/ctr.c b/crypto/ctr.c
index ff4d21e..57114b1 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -370,9 +370,9 @@ static int crypto_rfc3686_create(struct crypto_template 
*tmpl,
spawn = skcipher_instance_ctx(inst);
 
crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
-   err = crypto_grab_skcipher2(spawn, cipher_name, 0,
-   crypto_requires_sync(algt->type,
-algt->mask));
+   err = crypto_grab_skcipher(spawn, cipher_name, 0,
+  crypto_requires_sync(algt->type,
+   algt->mask));
if (err)
goto err_free_inst;
 
diff --git a/crypto/cts.c b/crypto/cts.c
index 5197618..8883b62 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -348,9 +348,9 @@ static int crypto_cts_create(struct crypto_template *tmpl,

Alignment of shash input buffers?

2016-10-28 Thread Eric Biggers

Hi,

I'm having trouble understanding how alignment of shash input buffers is
supposed to work.  If you pass crypto_shash_update() a buffer that is not
aligned to the shash algorithm's alignmask, it will call the underlying
->update() function twice, once with a temporary aligned buffer and once with
the aligned remainder of the original input buffer.  So far so good.

The problem is that some (many?) hash algorithms actually deal with fixed size
blocks, and the lengths of buffers which users might pass to
crypto_shash_update() are not necessarily multiples of the block size.  This can
cause the input buffer to fall out of alignment.  

Let's use cmac(aes) as an example.  It will copy up to 16 bytes to a temporary
buffer, process (xor into the state and encrypt) that block if full, then
directly process 16-byte blocks from the input buffer, then save any remainder.
The middle step uses crypto_xor() directly on the input buffer, which assumes
4-byte alignment.  However this alignment is not guaranteed.  For example, if I
pass two aligned buffers, one of length 15 and one of length 17 to
crypto_shash_update(), crypto_xor() is called on a misaligned pointer.

ghash is another example of an algorithm that has this problem, although that
one seems even more broken since it doesn't even set an alignmask at all (?).

I am wondering, is this intentional?  If it's broken, do shash algorithms need
to be fixed to assume no alignment, e.g. by using the get_unaligned_*() macros
or by memcpy()-ing each block into a temporary buffer?  Is there a better
solution?

Thanks,

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/16] crypto: skcipher - Add skcipher walk interface

2016-11-02 Thread Eric Biggers

Hi Herbert, just a few preliminary comments.  I haven't made it through
everything yet.

On Wed, Nov 02, 2016 at 07:19:02AM +0800, Herbert Xu wrote:
> +static int skcipher_walk_first(struct skcipher_walk *walk)
> +{
> + if (WARN_ON_ONCE(in_irq()))
> + return -EDEADLK;
> +
> + walk->nbytes = walk->total;
> + if (unlikely(!walk->total))
> + return 0;
> +
> + walk->buffer = NULL;
> + if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
> + int err = skcipher_copy_iv(walk);
> + if (err)
> + return err;
> + }
> +
> + walk->page = NULL;
> +
> + return skcipher_walk_next(walk);
> +}

I think the case where skcipher_copy_iv() fails may be handled incorrectly.
Wouldn't it need to set walk.nbytes to 0 so as to not confuse callers which
expect that behavior?  Or maybe it should be calling skcipher_walk_done().

> +static int skcipher_walk_skcipher(struct skcipher_walk *walk,
> +   struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +
> + scatterwalk_start(&walk->in, req->src);
> + scatterwalk_start(&walk->out, req->dst);
> +
> + walk->in.sg = req->src;
> + walk->out.sg = req->dst;

Setting walk->in.sg and walk->out.sg is redundant with the scatterwalk_start()
calls.

> +int skcipher_walk_virt(struct skcipher_walk *walk,
> +struct skcipher_request *req, bool atomic)
> +{
> + int err;
> +
> + walk->flags &= ~SKCIPHER_WALK_PHYS;
> +
> + err = skcipher_walk_skcipher(walk, req);
> +
> + walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0;
> +
> + return err;
> +}

This gets called with uninitialized 'walk.flags'.  This was somewhat of a
theoretical problem with the old blkcipher_walk code but it looks like now it
will interact badly with the new SKCIPHER_WALK_SLEEP flag.  As far as I can see,
whether the flag will end up set or not can depend on the uninitialized value.
It would be nice if this problem could be avoided entirely be setting flags=0.

I'm also wondering about the choice to not look at 'atomic' until after the call
to skcipher_walk_skcipher().  Wouldn't this mean that the choice of 'atomic'
would not be respected in e.g. the kmalloc() in skcipher_copy_iv()?

> +int skcipher_walk_async(struct skcipher_walk *walk,
> + struct skcipher_request *req)
> +{
> + walk->flags |= SKCIPHER_WALK_PHYS;
> +
> + INIT_LIST_HEAD(&walk->buffers);
> +
> + return skcipher_walk_skcipher(walk, req);
> +}
> +EXPORT_SYMBOL_GPL(skcipher_walk_async);

I don't see any users of the "async" walking being introduced; are some planned?

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

vmalloced stacks and scatterwalk_map_and_copy()

2016-11-03 Thread Eric Biggers

Hello,

I hit the BUG_ON() in arch/x86/mm/physaddr.c:26 while testing some crypto code
in an x86_64 kernel with CONFIG_DEBUG_VIRTUAL=y and CONFIG_VMAP_STACK=y:

/* carry flag will be set if starting x was >= PAGE_OFFSET */
VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x));

The problem is the following code in scatterwalk_map_and_copy() in
crypto/scatterwalk.c, which tries to determine if the buffer passed in aliases
the physical memory of the first segment of the scatterlist:

if (sg_page(sg) == virt_to_page(buf) &&
sg->offset == offset_in_page(buf))
return;

If 'buf' is on the stack with CONFIG_VMAP_STACK=y it will be a vmalloc address.
And virt_to_page(buf) does not have a meaningful behavior on vmalloc addresses.
Hence the BUG.

I don't think this should be fixed by forbidding passing a stack address to
scatterwalk_map_and_copy().  Not only are there a number of callers that
explicitly use stack addresses (e.g. poly_verify_tag() in
crypto/chacha20poly1305.c) but it's also possible for a whole skcipher_request
to be allocated on the stack with the SKCIPHER_REQUEST_ON_STACK() macro.  Note
that this has nothing to do with DMA per se.

Another solution would be to make scatterwalk_map_and_copy() explicitly check
for virt_addr_valid().  But this would make the behavior of
scatterwalk_map_and_copy() confusing because it would detect aliasing but only
under some circumstances, and it would depend on the kernel config.

Currently I think the best solution would be to require that callers to
scatterwalk_map_and_copy() do not alias their source and destination.  Then the
alias check could be removed.  This check has only been there since v4.2 (commit
74412fd5d71b6), so I'd hope not many callers rely on the behavior.  I'm not sure
exactly which ones do, though.

Thoughts on this?

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vmalloced stacks and scatterwalk_map_and_copy()

2016-11-03 Thread Eric Biggers

On Thu, Nov 03, 2016 at 01:30:49PM -0700, Andy Lutomirski wrote:
> 
> Also, Herbert, it seems like the considerable majority of the crypto
> code is acting on kernel virtual memory addresses and does software
> processing.  Would it perhaps make sense to add a kvec-based or
> iov_iter-based interface to the crypto code?  I bet it would be quite
> a bit faster and it would make crypto on stack buffers work directly.

I'd like to hear Herbert's opinion on this too, but as I understand it, if a
symmetric cipher API operating on virtual addresses was added, similar to the
existing "shash" API it would only allow software processing.  Whereas with the
current API you can request a transform and use it the same way regardless of
whether the crypto framework has chosen a software or hardware implementation,
or a combination thereof.  If this wasn't a concern then I expect using virtual
addresses would indeed simplify things a lot, at least for users not already
working with physical memory (struct page).

Either way, in the near term it looks like 4.9 will be released with the new
behavior that encryption/decryption is not supported on stack buffers.
Separately from the scatterwalk_map_and_copy() issue, today I've found two
places in the filesystem-level encryption code that do encryption on stack
buffers and therefore hit the 'BUG_ON(!virt_addr_valid(buf));' in sg_set_buf().
I will be sending patches to fix these, but I suspect there may be more crypto
API users elsewhere that have this same problem.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] fscrypto: don't use on-stack buffer for filename encryption

2016-11-03 Thread Eric Biggers

With the new (in 4.9) option to use a virtually-mapped stack
(CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for
the scatterlist crypto API because they may not be directly mappable to
struct page.  For short filenames, fname_encrypt() was encrypting a
stack buffer holding the padded filename.  Fix it by encrypting the
filename in-place in the output buffer, thereby making the temporary
buffer unnecessary.

This bug could most easily be observed in a CONFIG_DEBUG_SG kernel
because this allowed the BUG in sg_set_buf() to be triggered.

Signed-off-by: Eric Biggers 
---
 fs/crypto/fname.c | 53 +
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 9a28133..9b774f4 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -39,65 +39,54 @@ static void fname_crypt_complete(struct 
crypto_async_request *req, int res)
 static int fname_encrypt(struct inode *inode,
const struct qstr *iname, struct fscrypt_str *oname)
 {
-   u32 ciphertext_len;
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[FS_CRYPTO_BLOCK_SIZE];
-   struct scatterlist src_sg, dst_sg;
+   struct scatterlist sg;
int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
-   char *workbuf, buf[32], *alloc_buf = NULL;
-   unsigned lim;
+   unsigned int lim;
+   unsigned int cryptlen;
 
lim = inode->i_sb->s_cop->max_namelen(inode);
if (iname->len <= 0 || iname->len > lim)
return -EIO;
 
-   ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE);
-   ciphertext_len = round_up(ciphertext_len, padding);
-   ciphertext_len = min(ciphertext_len, lim);
+   /*
+* Copy the filename to the output buffer for encrypting in-place and
+* pad it with the needed number of NUL bytes.
+*/
+   cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE);
+   cryptlen = round_up(cryptlen, padding);
+   cryptlen = min(cryptlen, lim);
+   memcpy(oname->name, iname->name, iname->len);
+   memset(oname->name + iname->len, 0, cryptlen - iname->len);
 
-   if (ciphertext_len <= sizeof(buf)) {
-   workbuf = buf;
-   } else {
-   alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
-   if (!alloc_buf)
-   return -ENOMEM;
-   workbuf = alloc_buf;
-   }
+   /* Initialize the IV */
+   memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
 
-   /* Allocate request */
+   /* Set up the encryption request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
-   "%s: crypto_request_alloc() failed\n", __func__);
-   kfree(alloc_buf);
+   "%s: skcipher_request_alloc() failed\n", __func__);
return -ENOMEM;
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
fname_crypt_complete, &ecr);
+   sg_init_one(&sg, oname->name, cryptlen);
+   skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv);
 
-   /* Copy the input */
-   memcpy(workbuf, iname->name, iname->len);
-   if (iname->len < ciphertext_len)
-   memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
-
-   /* Initialize IV */
-   memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
-
-   /* Create encryption request */
-   sg_init_one(&src_sg, workbuf, ciphertext_len);
-   sg_init_one(&dst_sg, oname->name, ciphertext_len);
-   skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+   /* Do the encryption */
res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
+   /* Request is being completed asynchronously; wait for it */
wait_for_completion(&ecr.completion);
res = ecr.res;
}
-   kfree(alloc_buf);
skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
@@ -105,7 +94,7 @@ static int fname_encrypt(struct inode *inode,
return res;
}
 
-   oname->len = ciphertext_len;
+   oname->len = cryptlen;
return 0;
 }
 
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/2] fscrypto: don't use on-stack buffer for key derivation

2016-11-03 Thread Eric Biggers

With the new (in 4.9) option to use a virtually-mapped stack
(CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for
the scatterlist crypto API because they may not be directly mappable to
struct page.  get_crypt_info() was using a stack buffer to hold the
output from the encryption operation used to derive the per-file key.
Fix it by using a heap buffer.

This bug could most easily be observed in a CONFIG_DEBUG_SG kernel
because this allowed the BUG in sg_set_buf() to be triggered.

Signed-off-by: Eric Biggers 
---
 fs/crypto/keyinfo.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 82f0285..67fb6d8 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -185,7 +185,7 @@ int get_crypt_info(struct inode *inode)
struct crypto_skcipher *ctfm;
const char *cipher_str;
int keysize;
-   u8 raw_key[FS_MAX_KEY_SIZE];
+   u8 *raw_key = NULL;
int res;
 
res = fscrypt_initialize();
@@ -238,6 +238,15 @@ int get_crypt_info(struct inode *inode)
if (res)
goto out;
 
+   /*
+* This cannot be a stack buffer because it is passed to the scatterlist
+* crypto API as part of key derivation.
+*/
+   res = -ENOMEM;
+   raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS);
+   if (!raw_key)
+   goto out;
+
if (fscrypt_dummy_context_enabled(inode)) {
memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
goto got_key;
@@ -276,7 +285,8 @@ int get_crypt_info(struct inode *inode)
if (res)
goto out;
 
-   memzero_explicit(raw_key, sizeof(raw_key));
+   kzfree(raw_key);
+   raw_key = NULL;
if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
put_crypt_info(crypt_info);
goto retry;
@@ -287,7 +297,7 @@ int get_crypt_info(struct inode *inode)
if (res == -ENOKEY)
res = 0;
put_crypt_info(crypt_info);
-   memzero_explicit(raw_key, sizeof(raw_key));
+   kzfree(raw_key);
return res;
 }
 
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vmalloced stacks and scatterwalk_map_and_copy()

2016-11-03 Thread Eric Biggers

On Thu, Nov 03, 2016 at 02:12:07PM -0700, Eric Biggers wrote:
> On Thu, Nov 03, 2016 at 01:30:49PM -0700, Andy Lutomirski wrote:
> > 
> > Also, Herbert, it seems like the considerable majority of the crypto
> > code is acting on kernel virtual memory addresses and does software
> > processing.  Would it perhaps make sense to add a kvec-based or
> > iov_iter-based interface to the crypto code?  I bet it would be quite
> > a bit faster and it would make crypto on stack buffers work directly.
> 
> I'd like to hear Herbert's opinion on this too, but as I understand it, if a
> symmetric cipher API operating on virtual addresses was added, similar to the
> existing "shash" API it would only allow software processing.  Whereas with 
> the
> current API you can request a transform and use it the same way regardless of
> whether the crypto framework has chosen a software or hardware implementation,
> or a combination thereof.  If this wasn't a concern then I expect using 
> virtual
> addresses would indeed simplify things a lot, at least for users not already
> working with physical memory (struct page).
> 
> Either way, in the near term it looks like 4.9 will be released with the new
> behavior that encryption/decryption is not supported on stack buffers.
> Separately from the scatterwalk_map_and_copy() issue, today I've found two
> places in the filesystem-level encryption code that do encryption on stack
> buffers and therefore hit the 'BUG_ON(!virt_addr_valid(buf));' in 
> sg_set_buf().
> I will be sending patches to fix these, but I suspect there may be more crypto
> API users elsewhere that have this same problem.
> 
> Eric

[Added linux-mm to Cc]

For what it's worth, grsecurity has a special case to allow a scatterlist entry
to be created from a stack buffer:

static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
  unsigned int buflen)
{
const void *realbuf = buf;

#ifdef CONFIG_GRKERNSEC_KSTACKOVERFLOW
if (object_starts_on_stack(buf))
realbuf = buf - current->stack + current->lowmem_stack;
#endif

#ifdef CONFIG_DEBUG_SG
BUG_ON(!virt_addr_valid(realbuf));
#endif
sg_set_page(sg, virt_to_page(realbuf), buflen, 
offset_in_page(realbuf));
}

It seems to maintain two virtual mappings for each stack, a physically
contiguous one (task_struct.lowmem_stack) and a physically non-contiguous one
(task_struct.stack).  This seems different from upstream CONFIG_VMAP_STACK which
just maintains a physically non-contiguous one.

I don't know about all the relative merits of the two approaches.  But one of
the things that will need to be done with the currently upstream approach is
that all callers of sg_set_buf() will need to be checked to make sure they
aren't using stack addresses, and any that are will need to be updated to do
otherwise, e.g. by using heap-allocated memory.  I suppose this is already
happening, but in the case of the crypto API it will probably take a while for
all the users to be identified and updated.  (And it's not always clear from the
local context whether something can be stack memory or not, e.g. the memory for
crypto request objects may be either.)

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vmalloced stacks and scatterwalk_map_and_copy()

2016-11-04 Thread Eric Biggers

On Thu, Nov 03, 2016 at 08:57:49PM -0700, Andy Lutomirski wrote:
> 
> The crypto request objects can live on the stack just fine.  It's the
> request buffers that need to live elsewhere (or the alternative
> interfaces can be used, or the crypto core code can start using
> something other than scatterlists).
> 

There are cases where a crypto operation is done on a buffer embedded in a
request object.  The example I'm aware of is in the GCM implementation
(crypto/gcm.c).  Basically it needs to encrypt 16 zero bytes prepended with the
actual data, so it fills a buffer in the request object
(crypto_gcm_req_priv_ctx.auth_tag) with zeroes and builds a new scatterlist
which covers both this buffer and the original data scatterlist.

Granted, GCM provides the aead interface not the skcipher interface, and
currently there is no AEAD_REQUEST_ON_STACK() macro like there is a
SKCIPHER_REQUEST_ON_STACK() macro.  So maybe no one is creating aead requests on
the stack right now.  But it's something to watch out for.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access

2016-11-04 Thread Eric Biggers

On Thu, Nov 03, 2016 at 11:20:08PM +0100, Jason A. Donenfeld wrote:
> Hi David,
> 
> On Thu, Nov 3, 2016 at 6:08 PM, David Miller  wrote:
> > In any event no piece of code should be doing 32-bit word reads from
> > addresses like "x + 3" without, at a very minimum, going through the
> > kernel unaligned access handlers.
> 
> Excellent point. In otherwords,
> 
> ctx->r[0] = (le32_to_cpuvp(key +  0) >> 0) & 0x3ff;
> ctx->r[1] = (le32_to_cpuvp(key +  3) >> 2) & 0x303;
> ctx->r[2] = (le32_to_cpuvp(key +  6) >> 4) & 0x3ffc0ff;
> ctx->r[3] = (le32_to_cpuvp(key +  9) >> 6) & 0x3f03fff;
> ctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00f;
> 
> should change to:
> 
> ctx->r[0] = (le32_to_cpuvp(key +  0) >> 0) & 0x3ff;
> ctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x303;
> ctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> ctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> ctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00f;
> 

I agree, and the current code is wrong; but do note that this proposal is
correct for poly1305_setrkey() but not for poly1305_setskey() and
poly1305_blocks().  In the latter two cases, 4-byte alignment of the source
buffer is *not* guaranteed.  Although crypto_poly1305_update() will be called
with a 4-byte aligned buffer due to the alignmask set on poly1305_alg, the
algorithm operates on 16-byte blocks and therefore has to buffer partial blocks.
If some number of bytes that is not 0 mod 4 is buffered, then the buffer will
fall out of alignment on the next update call.  Hence, get_unaligned_le32() is
actually needed on all the loads, since the buffer will, in general, be of
unknown alignment.

Note: some other shash algorithms have this problem too and do not handle it
correctly.  It seems to be a common mistake.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access

2016-11-07 Thread Eric Biggers

On Mon, Nov 07, 2016 at 07:08:22PM +0100, Jason A. Donenfeld wrote:
> Hmm... The general data flow that strikes me as most pertinent is
> something like:
> 
> struct sk_buff *skb = get_it_from_somewhere();
> skb = skb_share_check(skb, GFP_ATOMIC);
> num_frags = skb_cow_data(skb, ..., ...);
> struct scatterlist sg[num_frags];
> sg_init_table(sg, num_frags);
> skb_to_sgvec(skb, sg, ..., ...);
> blkcipher_walk_init(&walk, sg, sg, len);
> blkcipher_walk_virt_block(&desc, &walk, BLOCK_SIZE);
> while (walk.nbytes >= BLOCK_SIZE) {
> size_t chunk_len = rounddown(walk.nbytes, BLOCK_SIZE);
> poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len);
> blkcipher_walk_done(&desc, &walk, walk.nbytes % BLOCK_SIZE);
> }
> if (walk.nbytes) {
> poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes);
> blkcipher_walk_done(&desc, &walk, 0);
> }
> 
> Is your suggestion that that in the final if block, walk.src.virt.addr
> might be unaligned? Like in the case of the last fragment being 67
> bytes long?

I was not referring to any users in particular, only what users could do.  As an
example, if you did crypto_shash_update() with 32, 15, then 17 bytes, and the
underlying algorithm is poly1305-generic, the last block would end up
misaligned.  This doesn't appear possible with your pseudocode because it only
passes in multiples of the block size until the very end.  However I don't see
it claimed anywhere that shash API users have to do that.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access

2016-11-07 Thread Eric Biggers

On Mon, Nov 07, 2016 at 08:02:35PM +0100, Jason A. Donenfeld wrote:
> On Mon, Nov 7, 2016 at 7:26 PM, Eric Biggers  wrote:
> >
> > I was not referring to any users in particular, only what users could do.  
> > As an
> > example, if you did crypto_shash_update() with 32, 15, then 17 bytes, and 
> > the
> > underlying algorithm is poly1305-generic, the last block would end up
> > misaligned.  This doesn't appear possible with your pseudocode because it 
> > only
> > passes in multiples of the block size until the very end.  However I don't 
> > see
> > it claimed anywhere that shash API users have to do that.
> 
> Actually it appears that crypto/poly1305_generic.c already buffers
> incoming blocks to a buffer that definitely looks aligned, to prevent
> this condition!
> 

No it does *not* buffer all incoming blocks, which is why the source pointer can
fall out of alignment.  Yes, I actually tested this.  In fact this situation is
even hit, in both possible places, in the self-tests.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v4] poly1305: generic C can be faster on chips with slow unaligned access

2016-11-07 Thread Eric Biggers

On Mon, Nov 07, 2016 at 08:47:09PM +0100, Jason A. Donenfeld wrote:
> By using the unaligned access helpers, we drastically improve
> performance on small MIPS routers that have to go through the exception
> fix-up handler for these unaligned accesses.
> 
> Signed-off-by: Jason A. Donenfeld 
> ---

Reviewed-by: Eric Biggers 

Nit: the subject line is a little unclear about what was changed.
"make generic C faster on chips with slow unaligned access" would be better.
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v4] poly1305: generic C can be faster on chips with slow unaligned access

2016-11-08 Thread Eric Biggers

On Tue, Nov 08, 2016 at 08:52:39AM +0100, Martin Willi wrote:
> 
> 
> Not sure what the exact alignment rules for key/iv are, but maybe we
> want to replace the same function in chacha20_generic.c as well?
> 
> Martin

chacha20-generic provides a blkcipher API and sets an alignmask of sizeof(u32)
- 1.  This applies to the key buffer for ->setkey() and to the mapped addresses
for the input/output buffers and IV during the blkcipher walk.  So it does not
appear to have the problems that poly1305 has.  

I do however see one small problem which is that
'u8 stream[CHACHA20_BLOCK_SIZE];' is, strictly speaking, not guaranteed to be
aligned to u32.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [v2 PATCH 1/16] crypto: skcipher - Add skcipher walk interface

2016-11-13 Thread Eric Biggers

Hi Herbert,

On Sun, Nov 13, 2016 at 07:45:32PM +0800, Herbert Xu wrote:
> +int skcipher_walk_done(struct skcipher_walk *walk, int err)
> +{
> + unsigned int nbytes = 0;
> + unsigned int n = 0;
> +
> + if (likely(err >= 0)) {
> + n = walk->nbytes - err;
> + nbytes = walk->total - n;
> + }
> +
> + if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS |
> + SKCIPHER_WALK_SLOW |
> + SKCIPHER_WALK_COPY |
> + SKCIPHER_WALK_DIFF {
> +unmap_src:
> + skcipher_unmap_src(walk);

Isn't it wrong to unmap the buffers when err < 0?  They won't have been mapped
yet, e.g. if the 'skcipher_walk_done(walk, -EINVAL)' case is hit.

> + /* Short-circuit for the common/fast path. */
> + if (!(((unsigned long)walk->iv ^ (unsigned long)walk->oiv) |
> +  ((unsigned long)walk->buffer ^ (unsigned long)walk->page) |
> +  (unsigned long)walk->page))
> + goto out;

This is really saying that the IV wasn't copied and that walk->buffer and
walk->page are both NULL.  But if walk->buffer is NULL then the IV wasn't
copied.  So this can be simplified to:

if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
goto out;

> +int skcipher_walk_next(struct skcipher_walk *walk)
> +{

Shouldn't this be static?  There's even a static declaration earlier in the
file.

> +static int skcipher_next_slow(struct skcipher_walk *walk)
> +{
> + bool phys = walk->flags & SKCIPHER_WALK_PHYS;
> + unsigned alignmask = walk->alignmask;
> + unsigned bsize = walk->chunksize;
...
> + walk->nbytes = bsize;

skcipher_next_slow() is always setting up 'chunksize' bytes to be processed.
Isn't this wrong in the 'blocksize < chunksize' case because fewer than
'chunksize' bytes may need to be processed?

> +static inline void *skcipher_map(struct scatter_walk *walk)
> +{
> + struct page *page = scatterwalk_page(walk);
> +
> + return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) +
> +offset_in_page(walk->offset);
> +}

Is the PageHighMem() optimization really worthwhile?  It will skip kmap_atomic()
and hence preempt_disable() for non-highmem pages, so it may make it harder to
find bugs where users are sleeping when they shouldn't be.  It also seems
inconsistent that skcipher_map() will do this optimization but scatterwalk_map()
won't.

> + if (!walk->page) {
> + gfp_t gfp = skcipher_walk_gfp(walk);
> +
> + walk->page = (void *)__get_free_page(gfp);
> + if (!walk->page)
> + goto slow_path;
> + }
> +
> + walk->nbytes = min_t(unsigned, n,
> +  PAGE_SIZE - offset_in_page(walk->page));

walk->page will be page-aligned, so there's no need for offset_in_page().  Also,
'n' has already been clamped to at most one page.  So AFAICS this can simply be
'walk->nbytes = n;'.

> +int skcipher_walk_done(struct skcipher_walk *walk, int err);
...
> +void skcipher_walk_complete(struct skcipher_walk *walk, int err);

The naming is confusing: "done" vs. "complete".  They can easily be mixed up.
Would it make more sense to have something with "async" in the name for the
second one, like skcipher_walk_async_done()?

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [v2 PATCH 6/16] crypto: cryptd - Add support for skcipher

2016-11-13 Thread Eric Biggers

On Sun, Nov 13, 2016 at 07:45:37PM +0800, Herbert Xu wrote:
> +static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
> + int err)
> +{
> + struct skcipher_request *req = skcipher_request_cast(base);
> + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
> + struct crypto_skcipher *child = ctx->child;
> + SKCIPHER_REQUEST_ON_STACK(subreq, child);
> +
> + if (unlikely(err == -EINPROGRESS))
> + goto out;
> +
> + skcipher_request_set_tfm(subreq, child);
> + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
> +   NULL, NULL);
> + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
> +req->iv);
> +
> + err = crypto_skcipher_encrypt(subreq);
> + skcipher_request_zero(subreq);
> +
> + req->base.complete = rctx->complete;
> +
> +out:
> + cryptd_skcipher_complete(req, err);
> +}
> +
> +static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
> + int err)
> +{
> + struct skcipher_request *req = skcipher_request_cast(base);
> + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
> + struct crypto_skcipher *child = ctx->child;
> + SKCIPHER_REQUEST_ON_STACK(subreq, child);
> +
> + if (unlikely(err == -EINPROGRESS))
> + goto out;
> +
> + skcipher_request_set_tfm(subreq, child);
> + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
> +   NULL, NULL);
> + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
> +req->iv);
> +
> + err = crypto_skcipher_decrypt(subreq);
> + skcipher_request_zero(subreq);
> +
> + req->base.complete = rctx->complete;
> +
> +out:
> + cryptd_skcipher_complete(req, err);
> +}

cryptd_skcipher_encrypt() and cryptd_skcipher_decrypt() are identical except for
whether crypto_skcipher_encrypt() or crypto_skcipher_decrypt() is used.  I
suggest having them wrap a function cryptd_skcipher_crypt() that takes in a
function pointer to crypto_skcipher_encrypt or crypto_skcipher_decrypt.
Alternatively a bool would be okay too.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [v2 PATCH 4/16] crypto: xts - Convert to skcipher

2016-11-13 Thread Eric Biggers

On Sun, Nov 13, 2016 at 07:45:35PM +0800, Herbert Xu wrote:
> +static int do_encrypt(struct skcipher_request *req, int err)
> +{
> + struct rctx *rctx = skcipher_request_ctx(req);
> + struct skcipher_request *subreq;
> +
> + subreq = &rctx->subreq;
> +
> + while (!err && rctx->left) {
> + err = pre_crypt(req) ?:
> +   crypto_skcipher_encrypt(subreq) ?:
> +   post_crypt(req);
> +
> + if (err == -EINPROGRESS ||
> + (err == -EBUSY &&
> +  req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
> + return err;
> + }
> +
> + exit_crypt(req);
> + return err;
> +}
> +
> +static void encrypt_done(struct crypto_async_request *areq, int err)
> +{
> + struct skcipher_request *req = areq->data;
> + struct skcipher_request *subreq;
> + struct rctx *rctx;
> +
> + rctx = skcipher_request_ctx(req);
> + subreq = &rctx->subreq;
> + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
> +
> + err = do_encrypt(req, err ?: post_crypt(req));
> + if (rctx->left)
> + return;
> +
> + skcipher_request_complete(req, err);
> +}
> +
> +static int encrypt(struct skcipher_request *req)
> +{
> + return do_encrypt(req, init_crypt(req, encrypt_done));
> +}
> +
> +static int do_decrypt(struct skcipher_request *req, int err)
> +{
> + struct rctx *rctx = skcipher_request_ctx(req);
> + struct skcipher_request *subreq;
> +
> + subreq = &rctx->subreq;
> +
> + while (!err && rctx->left) {
> + err = pre_crypt(req) ?:
> +   crypto_skcipher_decrypt(subreq) ?:
> +   post_crypt(req);
> +
> + if (err == -EINPROGRESS ||
> + (err == -EBUSY &&
> +  req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
> + return err;
> + }
> +
> + exit_crypt(req);
> + return err;
> +}
> +
> +static void decrypt_done(struct crypto_async_request *areq, int err)
> +{
> + struct skcipher_request *req = areq->data;
> + struct skcipher_request *subreq;
> + struct rctx *rctx;
> +
> + rctx = skcipher_request_ctx(req);
> + subreq = &rctx->subreq;
> + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
> +
> + err = do_decrypt(req, err ?: post_crypt(req));
> + if (rctx->left)
> + return;
> +
> + skcipher_request_complete(req, err);
> +}
> +
> +static int decrypt(struct skcipher_request *req)
> +{
> + return do_decrypt(req, init_crypt(req, decrypt_done));
>  }

There's duplicated code for encryption and decryption here.  AFAICS, the only
difference between XTS encryption and decryption is whether the block cipher is
used in encryption or decryption mode for the ECB step.  So I suggest storing a
function pointer in 'struct rctx' to either crypto_skcipher_encrypt or
crypto_skcipher_decrypt, then calling through it for the ECB step.  Then this
code can be shared.  In other words I'd like the top-level functions to look
like this:

static int encrypt(struct skcipher_request *req)
{
struct rctx *rctx = skcipher_request_ctx(req);

rctx->crypt = crypto_skcipher_encrypt;

return do_crypt(req);
}

static int decrypt(struct skcipher_request *req)
{
struct rctx *rctx = skcipher_request_ctx(req);

rctx->crypt = crypto_skcipher_decrypt;

return do_crypt(req);
}

I'm also wondering about the line which does
'subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;'.
Is the real intent of that to clear the CRYPTO_TFM_REQ_MAY_SLEEP flag because
the completion callback may be called in an atomic context, and if so shouldn't
it just clear that flag only, rather than all flags except
CRYPTO_TFM_REQ_MAY_BACKLOG?

> + if (req->cryptlen > XTS_BUFFER_SIZE) {
> + subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
> + rctx->ext = kmalloc(subreq->cryptlen, gfp);
> + }

There's no check for failure to allocate the 'rctx->ext' memory.

> + /* Alas we screwed up the naming so we have to mangle the
> +  * cipher name.
> +  */
> + if (!strncmp(cipher_name, "ecb(", 4)) {
> + unsigned len;
>  
> - inst->alg.cra_ctxsize = sizeof(struct priv);
> + len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name));
> + if (len < 2 || len >= sizeof(ctx->name))
> + goto err_drop_spawn;
>  
> - inst->alg.cra_init = init_tfm;
> - inst->alg.cra_exit = exit_tfm;
> + if (ctx->name[len - 1] != ')')
> + goto err_drop_spawn;
>  
> - inst->alg.cra_blkcipher.setkey = setkey;
> - inst->alg.cra_blkcipher.encrypt = encrypt;
> - inst->alg.cra_blkcipher.decrypt = decrypt;
> + ctx->name[len - 1] = 0;
>  
> -out_put_alg:
> - crypto_mod_put(alg);
> - return inst;
> -}
> + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
> +

Re: [v2 PATCH 7/16] crypto: simd - Add simd skcipher helper

2016-11-13 Thread Eric Biggers

On Sun, Nov 13, 2016 at 07:45:38PM +0800, Herbert Xu wrote:
> This patch adds the simd skcipher helper which is meant to be
> a replacement for ablk helper.  It replaces the underlying blkcipher
> interface with skcipher, and also presents the top-level algorithm
> as an skcipher.

I assume this means it's planned for all users of ablk_helper to be migrated to
crypto_simd, and ablk_helper will be removed?

> + salg = kzalloc(sizeof(*alg), GFP_KERNEL);
> + if (!salg) {
> + salg = ERR_PTR(-ENOMEM);
> + goto out_put_tfm;
> + }

Shouldn't this be 'sizeof(*salg)'?

> + tfm = crypto_alloc_skcipher(basename, CRYPTO_ALG_INTERNAL,
> + CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
> + if (IS_ERR(tfm))
> + return ERR_CAST(tfm);
> +
> + ialg = crypto_skcipher_alg(tfm);

It seems this really just needs an algorithm and not a transform.  Perhaps it
should be calling crypto_find_alg() directly?

> + err = -ENAMETOOLONG;
> + if (snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", algname) >=
> + CRYPTO_MAX_ALG_NAME)
> + goto out_free_salg;
> +
> + if (snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
> +  drvname) >= CRYPTO_MAX_ALG_NAME)
> + goto out_free_salg;

Could use strscpy() or strlcpy() here.

> +static int simd_skcipher_encrypt(struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
> + struct skcipher_request *subreq;
> + struct crypto_skcipher *child;
> +
> + subreq = skcipher_request_ctx(req);
> + *subreq = *req;
> +
> + if (!may_use_simd() ||
> + (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
> + child = &ctx->cryptd_tfm->base;
> + else
> + child = cryptd_skcipher_child(ctx->cryptd_tfm);
> +
> + skcipher_request_set_tfm(subreq, child);
> +
> + return crypto_skcipher_encrypt(subreq);
> +}
> +
> +static int simd_skcipher_decrypt(struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
> + struct skcipher_request *subreq;
> + struct crypto_skcipher *child;
> +
> + subreq = skcipher_request_ctx(req);
> + *subreq = *req;
> +
> + if (!may_use_simd() ||
> + (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
> + child = &ctx->cryptd_tfm->base;
> + else
> + child = cryptd_skcipher_child(ctx->cryptd_tfm);
> +
> + skcipher_request_set_tfm(subreq, child);
> +
> + return crypto_skcipher_decrypt(subreq);
> +}

These are the same except for the
crypto_skcipher_encrypt/crypto_skcipher_decrypt at the end, so they could be
mostly shared.
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] fscrypto: don't use on-stack buffer for key derivation

2016-11-15 Thread Eric Biggers

On Tue, Nov 15, 2016 at 11:47:04AM -0500, Theodore Ts'o wrote:
> On Thu, Nov 03, 2016 at 03:03:02PM -0700, Eric Biggers wrote:
> > With the new (in 4.9) option to use a virtually-mapped stack
> > (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for
> > the scatterlist crypto API because they may not be directly mappable to
> > struct page.  get_crypt_info() was using a stack buffer to hold the
> > output from the encryption operation used to derive the per-file key.
> > Fix it by using a heap buffer.
> > 
> > This bug could most easily be observed in a CONFIG_DEBUG_SG kernel
> > because this allowed the BUG in sg_set_buf() to be triggered.
> > 
> > Signed-off-by: Eric Biggers 
> 
> This commit is on the fscrypt and dev branches on ext4.git.
> 
> - Ted

Hi Ted,

Would it make any sense to send these two patches to Linus for v4.9-rc6, given
that they fix bugs introduced in 4.9 with the virtually-mapped stack feature?
Or would you prefer to wait and have them go to 4.9 via stable?
Note that CONFIG_VMAP_STACK defaults to y on x86_64.

Thanks,

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vmalloced stacks and scatterwalk_map_and_copy()

2016-11-21 Thread Eric Biggers

On Mon, Nov 21, 2016 at 04:26:19PM +0800, Herbert Xu wrote:
> crypto: scatterwalk - Remove unnecessary aliasing check in map_and_copy
> 
> The aliasing check in map_and_copy is no longer necessary because
> the IPsec ESP code no longer provides an IV that points into the
> actual request data.  As this check is now triggering BUG checks
> due to the vmalloced stack code, I'm removing it.
> 
> Reported-by: Eric Biggers 
> Signed-off-by: Herbert Xu 
> 
> diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
> index 52ce17a..c16c94f8 100644
> --- a/crypto/scatterwalk.c
> +++ b/crypto/scatterwalk.c
> @@ -68,10 +68,6 @@ void scatterwalk_map_and_copy(void *buf, struct 
> scatterlist *sg,
>  
>   sg = scatterwalk_ffwd(tmp, sg, start);
>  
> - if (sg_page(sg) == virt_to_page(buf) &&
> - sg->offset == offset_in_page(buf))
> - return;
> -
>   scatterwalk_start(&walk, sg);
>   scatterwalk_copychunks(buf, &walk, nbytes, out);
>   scatterwalk_done(&walk, out, 0);

This looks fine to me if you're confident that the aliasing check is indeed no
longer necessary.

Another idea I had was to replace memcpy() with memmove().  But I don't want to
be in a situation where we're stuck with memmove() forever because of users who
probably don't even exist.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] crypto: acomp - don't use stack buffer in test_acomp()

2016-11-23 Thread Eric Biggers

With virtually-mapped stacks (CONFIG_VMAP_STACK=y), using the
scatterlist crypto API with stack buffers is not allowed, and with
appropriate debugging options will cause the
'BUG_ON(!virt_addr_valid(buf));' in sg_set_buf() to be triggered.
Use a heap buffer instead.

Fixes: d7db7a882deb ("crypto: acomp - update testmgr with support for acomp")
Signed-off-by: Eric Biggers 
---
 crypto/testmgr.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index ded50b6..aca1b7b 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1448,17 +1448,21 @@ static int test_acomp(struct crypto_acomp *tfm, struct 
comp_testvec *ctemplate,
 {
const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm));
unsigned int i;
-   char output[COMP_BUF_SIZE];
+   char *output;
int ret;
struct scatterlist src, dst;
struct acomp_req *req;
struct tcrypt_result result;
 
+   output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+   if (!output)
+   return -ENOMEM;
+
for (i = 0; i < ctcount; i++) {
unsigned int dlen = COMP_BUF_SIZE;
int ilen = ctemplate[i].inlen;
 
-   memset(output, 0, sizeof(output));
+   memset(output, 0, dlen);
init_completion(&result.completion);
sg_init_one(&src, ctemplate[i].input, ilen);
sg_init_one(&dst, output, dlen);
@@ -1507,7 +1511,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct 
comp_testvec *ctemplate,
unsigned int dlen = COMP_BUF_SIZE;
int ilen = dtemplate[i].inlen;
 
-   memset(output, 0, sizeof(output));
+   memset(output, 0, dlen);
init_completion(&result.completion);
sg_init_one(&src, dtemplate[i].input, ilen);
sg_init_one(&dst, output, dlen);
@@ -1555,6 +1559,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct 
comp_testvec *ctemplate,
ret = 0;
 
 out:
+   kfree(output);
return ret;
 }
 
-- 
2.8.0.rc3.226.g39d4020

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Crash in crypto mcryptd

2016-12-01 Thread Eric Biggers

On Thu, Dec 01, 2016 at 05:47:02PM -0800, Tim Chen wrote:
> On Thu, 2016-12-01 at 19:00 -0500, Mikulas Patocka wrote:
> > Hi
> > 
> > There is a bug in mcryptd initialization.
> > 
> > This is a test module that tries various hash algorithms. When you load 
> > the module with "insmod test.ko 'alg=mcryptd(md5)'", the machine crashes.
> 
> I don't think your test setup is right.  The mcryptd supports only 
> multi-buffer
> algorithm.  I don't think there is such an implementation for md5.
> 
> Please refer to arch/x86/crypto/sha1-mb 
> multi-buffer implementation of sha1 to see the proper
> setup and usage with mcryptd.  You can also run tcrypt test to
> exercise this code.
> 
> Tim

No, mcryptd must not crash the kernel if it's passed the wrong algorithm.
Users can try to instantiate it with any algorithm using AF_ALG, for example:

struct sockaddr_alg addr = {
.salg_type = "hash",
.salg_name = "mcryptd(md5)",
};

int fd = socket(AF_ALG, SOCK_SEQPACKET, 0);

bind(fd, (struct sockaddr *)&addr, sizeof(addr));

Currently, this instantly crashes the kernel.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 1/6] crypto: testmgr - avoid overlap in chunked tests

2016-12-07 Thread Eric Biggers

On Mon, Dec 05, 2016 at 06:42:23PM +, Ard Biesheuvel wrote:
> The IDXn offsets are chosen such that tap values (which may go up to
> 255) end up overlapping in the xbuf allocation. In particular, IDX1
> and IDX3 are too close together, so update IDX3 to avoid this issue.
> 

Hi Ard,

This patch is causing the self-tests for "xts(ecb(aes-asm))" to fail.
This is on x86.  Any idea why?  Here's what I see in the log:

alg: skcipher: Chunk test 1 failed on encryption at page 0 for 
xts(ecb(aes-asm))
: 1c 3b 3a 10 2f 77 03 86 e4 83 6c 99 e3 70 cf 9b
0010: ea 00 80 3f 5e 48 23 57 a4 ae 12 d4 14 a3 e6 3b
0020: 5d 31 e2 76 f8 fe 4a 8d 66 b3 17 f9 ac 68 3f 44
0030: 68 0a 86 ac 35 ad fc 33 45 be fe cb 4b b1 88 fd
0040: 57 76 92 6c 49 a3 09 5e b1 08 fd 10 98 ba ec 70
0050: aa a6 69 99 a7 2a 82 f2 7d 84 8b 21 d4 a7 41 b0
0060: c5 cd 4d 5f ff 9d ac 89 ae ba 12 29 61 d0 3a 75
0070: 71 23 e9 87 0f 8a cf 10 00 02 08 87 89 14 29 ca
0080: 2a 3e 7a 7d 7d f7 b1 03 55 16 5c 8b 9a 6d 0a 7d
0090: e8 b0 62 c4 50 0d c4 cd 12 0c 0f 74 18 da e3 d0
00a0: b5 78 1c 34 80 3f a7 54 21 c7 90 df e1 de 18 34
00b0: f2 80 d7 66 7b 32 7f 6c 8c d7 55 7e 12 ac 3a 0f
00c0: 93 ec 05 c5 2e 04 93 ef 31 a1 2d 3d 92 60 f7 9a
00d0: 28 9d 6a 37 9b c7 0c 50 84 14 73 d1 a8 cc 81 ec
00e0: 58 3e 96 45 e0 7b 8d 96 70 65 5b a5 bb cf ec c6
00f0: dc 39 66 38 0a d8 fe cb 17 b6 ba 02 46 9a 02 0a
0100: 84 e1 8e 8f 84 25 20 70 c1 3e 9f 1f 28 9b e5 4f
0110: bc 48 14 57 77 8f 61 60 15 e1 32 7a 02 b1 40 f1
0120: 50 5e b3 09 32 6d 68 37 8f 83 74 59 5c 84 9d 84
0130: f4 c3 33 ec 44 23 88 51 43 cb 47 bd 71 c5 ed ae
0140: 9b e6 9a 2f fe ce b1 be c9 de 24 4f be 15 99 2b
0150: 11 b7 7c 04 0f 12 bd 8f 6a 97 5a 44 a0 f9 0c 29
0160: a9 ab c3 d4 d8 93 92 72 84 c5 87 54 cc e2 94 52
0170: 9f 86 14 dc d2 ab a9 91 92 5f ed c4 ae 74 ff ac
0180: 6e 33 3b 93 eb 4a ff 04 79 da 9a 41 0e 44 50 e0
0190: dd 7a e4 c6 e2 91 09 00 57 5d a4 01 fc 07 05 9f
01a0: 64 5e 8b 7e 9b fd ef 33 94 30 54 ff 84 01 14 93
01b0: c2 7b 34 29 ea ed b4 ed 53 76 44 1a 77 ed 43 85
01c0: 1a d7 7f 16 f5 41 df d2 69 d5 0d 6a 5f 14 fb 0a
01d0: 1e 2a 8f 42 61 9e 5e c2 59 bd 96 d0 e5 cc 23 1f
01e0: fb 84 ed 15 a8 eb 66 07 31 6b f6 ef

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 1/6] crypto: testmgr - avoid overlap in chunked tests

2016-12-07 Thread Eric Biggers

On Wed, Dec 07, 2016 at 07:53:51PM +, Ard Biesheuvel wrote:
> Does this help at all?
> 
> diff --git a/crypto/testmgr.c b/crypto/testmgr.c
> index 670893bcf361..59e67f5b544b 100644
> --- a/crypto/testmgr.c
> +++ b/crypto/testmgr.c
> @@ -63,7 +63,7 @@ int alg_test(const char *driver, const char *alg,
> u32 type, u32 mask)
>   */
>  #define IDX1   32
>  #define IDX2   32400
> -#define IDX3   511
> +#define IDX3   1511
>  #define IDX4   8193
>  #define IDX5   2
>  #define IDX6   17101
> 

Yes, with that change made the self-tests pass.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers

In the 4.9 kernel, virtually-mapped stacks will be supported and enabled by
default on x86_64.  This has been exposing a number of problems in which
on-stack buffers are being passed into the crypto API, which to support crypto
accelerators operates on 'struct page' rather than on virtual memory.

Some of these problems have already been fixed, but I was wondering how many
problems remain, so I briefly looked through all the callers of sg_set_buf() and
sg_init_one().  Overall I found quite a few remaining problems, detailed below.

The following crypto drivers initialize a scatterlist to point into an
ahash_request, which may have been allocated on the stack with
AHASH_REQUEST_ON_STACK():

drivers/crypto/bfin_crc.c:351
drivers/crypto/qce/sha.c:299
drivers/crypto/sahara.c:973,988
drivers/crypto/talitos.c:1910
drivers/crypto/ccp/ccp-crypto-aes-cmac.c:105,119,142
drivers/crypto/ccp/ccp-crypto-sha.c:95,109,124
drivers/crypto/qce/sha.c:325

The following crypto drivers initialize a scatterlist to point into an
ablkcipher_request, which may have been allocated on the stack with
SKCIPHER_REQUEST_ON_STACK():

drivers/crypto/ccp/ccp-crypto-aes-xts.c:162
drivers/crypto/ccp/ccp-crypto-aes.c:94

And these other places do crypto operations on buffers clearly on the stack:

drivers/net/wireless/intersil/orinoco/mic.c:72
drivers/usb/wusbcore/crypto.c:264
net/ceph/crypto.c:182
net/rxrpc/rxkad.c:737,1000
security/keys/encrypted-keys/encrypted.c:500
fs/cifs/smbencrypt.c:96

Note: I almost certainly missed some, since I excluded places where the use of a
stack buffer was not obvious to me.  I also excluded AEAD algorithms since there
isn't an AEAD_REQUEST_ON_STACK() macro (yet).

The "good" news with these bugs is that on x86_64 without CONFIG_DEBUG_SG=y or
CONFIG_DEBUG_VIRTUAL=y, you can still do virt_to_page() and then page_address()
on a vmalloc address and get back the same address, even though you aren't
*supposed* to be able to do this.  This will make things still work for most
people.  The bad news is that if you happen to have consumed just about 1 page
(or N pages) of your stack at the time you call the crypto API, your stack
buffer may actually span physically non-contiguous pages, so the crypto
algorithm will scribble over some unrelated page.  Also, hardware crypto drivers
which actually do operate on physical memory will break too.

So I am wondering: is the best solution really to make all these crypto API
algorithms and users use heap buffers, as opposed to something like maintaining
a lowmem alias for the stack, or introducing a more general function to convert
buffers (possibly in the vmalloc space) into scatterlists?  And if the current
solution is desired, who is going to fix all of these bugs and when?

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers

On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
> > The following crypto drivers initialize a scatterlist to point into an
> > ahash_request, which may have been allocated on the stack with
> > AHASH_REQUEST_ON_STACK():
> >
> > drivers/crypto/bfin_crc.c:351
> > drivers/crypto/qce/sha.c:299
> > drivers/crypto/sahara.c:973,988
> > drivers/crypto/talitos.c:1910
> 
> This are impossible or highly unlikely on x86.
> 
> > drivers/crypto/ccp/ccp-crypto-aes-cmac.c:105,119,142
> > drivers/crypto/ccp/ccp-crypto-sha.c:95,109,124
> 
> These
> 
> > drivers/crypto/qce/sha.c:325
> 
> This is impossible on x86.
> 

Thanks for looking into these.  I didn't investigate who/what is likely to be
using each driver.

Of course I would not be surprised to see people want to start supporting
virtually mapped stacks on other architectures too.

> >
> > The "good" news with these bugs is that on x86_64 without CONFIG_DEBUG_SG=y 
> > or
> > CONFIG_DEBUG_VIRTUAL=y, you can still do virt_to_page() and then 
> > page_address()
> > on a vmalloc address and get back the same address, even though you aren't
> > *supposed* to be able to do this.  This will make things still work for most
> > people.  The bad news is that if you happen to have consumed just about 1 
> > page
> > (or N pages) of your stack at the time you call the crypto API, your stack
> > buffer may actually span physically non-contiguous pages, so the crypto
> > algorithm will scribble over some unrelated page.
> 
> Are you sure?  If it round-trips to the same virtual address, it
> doesn't matter if the buffer is contiguous.

You may be right, I didn't test this.  The hash_walk and blkcipher_walk code do
go page by page, but I suppose on x86_64 it would just step from one bogus
"struct page" to the adjacent one and still map it to the original virtual
address.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [kernel-hardening] Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers

On Sat, Dec 10, 2016 at 01:32:08PM +0800, Herbert Xu wrote:
> On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
> >
> > > The following crypto drivers initialize a scatterlist to point into an
> > > ablkcipher_request, which may have been allocated on the stack with
> > > SKCIPHER_REQUEST_ON_STACK():
> > >
> > > drivers/crypto/ccp/ccp-crypto-aes-xts.c:162
> > > drivers/crypto/ccp/ccp-crypto-aes.c:94
> > 
> > These are real, and I wish I'd known about them sooner.
> 
> Are you sure? Any instance of *_ON_STACK must only be used with
> sync algorithms and most drivers under drivers/crypto declare
> themselves as async.
> 

Why exactly is that?  Obviously, it wouldn't work if you returned from the stack
frame before the request completed, but does anything stop someone from using an
*_ON_STACK() request and then waiting for the request to complete before
returning from the stack frame?

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [kernel-hardening] Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers

On Sat, Dec 10, 2016 at 01:37:12PM +0800, Herbert Xu wrote:
> On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
> >
> > Herbert, how hard would it be to teach the crypto code to use a more
> > sensible data structure than scatterlist and to use coccinelle fix
> > this stuff for real?
> 
> First of all we already have a sync non-SG hash interface, it's
> called shash.
> 
> If we had enough sync-only users of skcipher then I'll consider
> adding an interface for it.  However, at this point in time it
> appears to more sense to convert such users over to the async
> interface rather than the other way around.
> 
> As for AEAD we never had a sync interface to begin with and I
> don't think I'm going to add one.
> 

Isn't the question of "should the API use physical or virtual addresses"
independent of the question of "should the API support asynchronous requests"?
You can already choose, via the flags and mask arguments when allocating a
crypto transform, whether you want it to be synchronous or asynchronous or
whether you don't care.  I don't see what that says about whether the API should
take in physical memory (e.g. scatterlists or struct pages) or virtual memory
(e.g. iov_iters or just regular pointers).

And while it's true that asynchronous algorithms are often provided by hardware
drivers that operate on physical memory, it's not always the case.  For example
some of the AES-NI algorithms are asynchronous only because they use the SSE
registers which can't always available to kernel code, so the request may need
to be processed by another thread.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-10 Thread Eric Biggers

On Sat, Dec 10, 2016 at 04:16:43PM +0800, Herbert Xu wrote:
> Why did you drop me from the CC list when you were replying to
> my email?
> 

Sorry --- this thread is Cc'ed to the kernel-hardening mailing list (which was
somewhat recently revived), and I replied to the email that reached me from
there.  It looks like it currently behaves a little differently from the vger
mailing lists, in that it replaces "Reply-To" with the address of the mailing
list itself rather than the sender.  So that's how you got dropped.  It also
seems to add a prefix to the subject...

I
> >> Are you sure? Any instance of *_ON_STACK must only be used with
> >> sync algorithms and most drivers under drivers/crypto declare
> >> themselves as async.
> > 
> > Why exactly is that?  Obviously, it wouldn't work if you returned from the 
> > stack
> > frame before the request completed, but does anything stop someone from 
> > using an
> > *_ON_STACK() request and then waiting for the request to complete before
> > returning from the stack frame?
> 
> The *_ON_STACK variants (except SHASH of course) were simply hacks
> to help legacy crypto API users to cope with the new async interface.
> In general we should avoid using the sync interface when possible.
> 
> It's a bad idea for the obvious reason that most of our async
> algorithms want to DMA and that doesn't work very well when you're
> using memory from the stack.

Sure, I just feel that the idea of "is this algorithm asynchronous?" is being
conflated with the idea of "does this algorithm operate on physical memory?".
Also, if *_ON_STACK are really not allowed with asynchronous algorithms can
there at least be a comment or a WARN_ON() to express this?

Thanks,

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-11 Thread Eric Biggers

On Sun, Dec 11, 2016 at 11:13:55AM -0800, Andy Lutomirski wrote:
> On Fri, Dec 9, 2016 at 3:08 PM, Eric Biggers  wrote:
> > In the 4.9 kernel, virtually-mapped stacks will be supported and enabled by
> > default on x86_64.  This has been exposing a number of problems in which
> > on-stack buffers are being passed into the crypto API, which to support 
> > crypto
> > accelerators operates on 'struct page' rather than on virtual memory.
> >
> 
> > fs/cifs/smbencrypt.c:96
> 
> This should use crypto_cipher_encrypt_one(), I think.
> 
> --Andy

Yes, I believe that's correct.  It encrypts 8 bytes with ecb(des) which is
equivalent to simply encrypting one block with DES.  Maybe try the following
(untested):

static int
smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
{
unsigned char key2[8];
struct crypto_cipher *cipher;

str_to_key(key, key2);

cipher = crypto_alloc_cipher("des", 0, 0);
if (IS_ERR(cipher)) {
cifs_dbg(VFS, "could not allocate des cipher\n");
return PTR_ERR(cipher);
}

crypto_cipher_setkey(cipher, key2, 8);

crypto_cipher_encrypt_one(cipher, out, in);

crypto_free_cipher(cipher);
return 0;
}

- Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] siphash: add cryptographically secure hashtable function

2016-12-11 Thread Eric Biggers

On Mon, Dec 12, 2016 at 04:48:17AM +0100, Jason A. Donenfeld wrote:
>
> diff --git a/lib/Makefile b/lib/Makefile
> index 50144a3aeebd..71d398b04a74 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
>sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
>flex_proportions.o ratelimit.o show_mem.o \
>is_single_threaded.o plist.o decompress.o kobject_uevent.o \
> -  earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
> +  earlycpio.o seq_buf.o siphash.o \
> +  nmi_backtrace.o nodemask.o win_minmax.o
>  
>  lib-$(CONFIG_MMU) += ioremap.o
>  lib-$(CONFIG_SMP) += cpumask.o
> @@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
>  obj-y += kstrtox.o
>  obj-$(CONFIG_TEST_BPF) += test_bpf.o
>  obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
> -obj-$(CONFIG_TEST_HASH) += test_hash.o
> +obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o

Maybe add to the help text for CONFIG_TEST_HASH that it now tests siphash too?

> +static inline u64 le64_to_cpuvp(const void *p)
> +{
> + return le64_to_cpup(p);
> +}

This assumes the key and message buffers are aligned to __alignof__(u64).
Unless that's going to be a clearly documented requirement for callers, you
should use get_unaligned_le64() instead.  And you can pass a 'u8 *' directly to
get_unaligned_le64(), no need for a helper function.

> + b = (v0 ^ v1) ^ (v2 ^ v3);
> + return (__force u64)cpu_to_le64(b);
> +}

It makes sense for this to return a u64, but that means the cpu_to_le64() is
wrong, since u64 indicates CPU endianness.  It should just return 'b'.

> +++ b/lib/test_siphash.c
> @@ -0,0 +1,116 @@
> +/* Test cases for siphash.c
> + *
> + * Copyright (C) 2015-2016 Jason A. Donenfeld 
> + *
> + * This file is provided under a dual BSD/GPLv2 license.
> + *
> + * SipHash: a fast short-input PRF
> + * https://131002.net/siphash/
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static const u8 test_vectors[64][8] = {
> + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72 },

Can you mention in a comment where the test vectors came from?

> + if (memcmp(&out, test_vectors[i], 8)) {
> + pr_info("self-test %u: FAIL\n", i + 1);
> + ret = -EINVAL;
> + }

If you make the output really be CPU-endian like I'm suggesting then this will
need to be something like:

if (out != get_unaligned_le64(test_vectors[i])) {

Or else make the test vectors be an array of u64.

- Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] orinoco: Use shash instead of ahash for MIC calculations

2016-12-12 Thread Eric Biggers

On Mon, Dec 12, 2016 at 12:55:55PM -0800, Andy Lutomirski wrote:
> +int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key,
>   u8 *da, u8 *sa, u8 priority,
>   u8 *data, size_t data_len, u8 *mic)
>  {
> - AHASH_REQUEST_ON_STACK(req, tfm_michael);
> - struct scatterlist sg[2];
> + SHASH_DESC_ON_STACK(desc, tfm_michael);
>   u8 hdr[ETH_HLEN + 2]; /* size of header + padding */
>   int err;
>  
> @@ -67,18 +66,27 @@ int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key,
>   hdr[ETH_ALEN * 2 + 2] = 0;
>   hdr[ETH_ALEN * 2 + 3] = 0;
>  
> - /* Use scatter gather to MIC header and data in one go */
> - sg_init_table(sg, 2);
> - sg_set_buf(&sg[0], hdr, sizeof(hdr));
> - sg_set_buf(&sg[1], data, data_len);
> + desc->tfm = tfm_michael;
> + desc->flags = 0;
>  
> - if (crypto_ahash_setkey(tfm_michael, key, MIC_KEYLEN))
> - return -1;
> + err = crypto_shash_setkey(tfm_michael, key, MIC_KEYLEN);
> + if (err)
> + return err;
> +
> + err = crypto_shash_init(desc);
> + if (err)
> + return err;
> +
> + err = crypto_shash_update(desc, hdr, sizeof(hdr));
> + if (err)
> + return err;
> +
> + err = crypto_shash_update(desc, data, data_len);
> + if (err)
> + return err;
> +
> + err = crypto_shash_final(desc, mic);
> + shash_desc_zero(desc);
>  
> - ahash_request_set_tfm(req, tfm_michael);
> - ahash_request_set_callback(req, 0, NULL, NULL);
> - ahash_request_set_crypt(req, sg, mic, data_len + sizeof(hdr));
> - err = crypto_ahash_digest(req);
> - ahash_request_zero(req);
>   return err;

It's probably a good idea to always do shash_desc_zero(), even when something
above it fails.  Otherwise this looks fine.  Thanks for sending these patches!

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3] siphash: add cryptographically secure hashtable function

2016-12-13 Thread Eric Biggers

On Mon, Dec 12, 2016 at 11:18:32PM +0100, Jason A. Donenfeld wrote:
> + for (; data != end; data += sizeof(u64)) {
> + m = get_unaligned_le64(data);
> + v3 ^= m;
> + SIPROUND;
> + SIPROUND;
> + v0 ^= m;
> + }
> +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
> + b |= le64_to_cpu(load_unaligned_zeropad(data) & 
> bytemask_from_count(left));
> +#else

Hmm, I don't think you can really do load_unaligned_zeropad() without first
checking for 'left != 0'.  The fixup section for load_unaligned_zeropad()
assumes that rounding the pointer down to a word boundary will produce an
address from which an 'unsigned long' can be loaded.  But if 'left = 0' and we
happen to be on a page boundary with the next page unmapped, then this will not
be true and the second load will still fault.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] keys/encrypted: Fix two crypto-on-the-stack bugs

2016-12-14 Thread Eric Biggers

On Wed, Dec 14, 2016 at 01:04:04PM +0800, Herbert Xu wrote:
> On Tue, Dec 13, 2016 at 06:53:03PM -0800, Andy Lutomirski wrote:
> > On Tue, Dec 13, 2016 at 6:48 PM, Andy Lutomirski  wrote:
> > > The driver put a constant buffer of all zeros on the stack and
> > > pointed a scatterlist entry at it in two places.  This doesn't work
> > > with virtual stacks.  Use ZERO_PAGE instead.
> > 
> > Wait a second...
> > 
> > > -   sg_set_buf(&sg_out[1], pad, sizeof pad);
> > > +   sg_set_buf(&sg_out[1], empty_zero_page, 16);
> > 
> > My fix here is obviously bogus (I meant to use ZERO_PAGE(0)), but what
> > exactly is the code trying to do?  The old code makes no sense.  It's
> > setting the *output* buffer to zeroed padding.
> 
> It's decrypting so I presume it just needs to the extra space for
> the padding and the result will be thrown away.
> 

It looks like the data is zero-padded to a 16-byte AES block boundary before
being encrypted with CBC, so the encrypted data may be longer than the original
data.  (I don't know why it doesn't use CTS mode instead, which would make the
lengths the same.)

Since the crypto API can do in-place operations, when *encrypting* I suggest
copying the decrypted data to the output buffer, padding it with 0's, and
encrypting it in-place.  This eliminates the need for the stack buffer.

But when *decrypting* there will be up to 15 extra throwaway bytes of output
produced by the decryption.  There must be space made for these, and since it's
output it can't be empty_zero_page.  I guess either check whether space can be
made for it in-place, or allocate a temporary buffer...

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] crypto: testmgr - use kmemdup instead of kmalloc+memcpy

2016-12-30 Thread Eric Biggers

From: Eric Biggers 

It's recommended to use kmemdup instead of kmalloc followed by memcpy.

Signed-off-by: Eric Biggers 
---
 crypto/testmgr.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 44e888b0b041..881176ebd8a8 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1463,13 +1463,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct 
comp_testvec *ctemplate,
int ilen = ctemplate[i].inlen;
void *input_vec;
 
-   input_vec = kmalloc(ilen, GFP_KERNEL);
+   input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL);
if (!input_vec) {
ret = -ENOMEM;
goto out;
}
 
-   memcpy(input_vec, ctemplate[i].input, ilen);
memset(output, 0, dlen);
init_completion(&result.completion);
sg_init_one(&src, input_vec, ilen);
@@ -1525,13 +1524,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct 
comp_testvec *ctemplate,
int ilen = dtemplate[i].inlen;
void *input_vec;
 
-   input_vec = kmalloc(ilen, GFP_KERNEL);
+   input_vec = kmemdup(dtemplate[i].input, ilen, GFP_KERNEL);
if (!input_vec) {
ret = -ENOMEM;
goto out;
}
 
-   memcpy(input_vec, dtemplate[i].input, ilen);
memset(output, 0, dlen);
init_completion(&result.completion);
sg_init_one(&src, input_vec, ilen);
-- 
2.11.0

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 5/6] crypto: aesni-intel - Add bulk request support

2017-01-12 Thread Eric Biggers

On Thu, Jan 12, 2017 at 01:59:57PM +0100, Ondrej Mosnacek wrote:
> This patch implements bulk request handling in the AES-NI crypto drivers.
> The major advantage of this is that with bulk requests, the kernel_fpu_*
> functions (which are usually quite slow) are now called only once for the 
> whole
> request.
> 

Hi Ondrej,

To what extent does the performance benefit of this patchset result from just
the reduced numbers of calls to kernel_fpu_begin() and kernel_fpu_end()?

If it's most of the benefit, would it make any sense to optimize
kernel_fpu_begin() and kernel_fpu_end() instead?

And if there are other examples besides kernel_fpu_begin/kernel_fpu_end where
the bulk API would provide a significant performance boost, can you mention
them?

Interestingly, the arm64 equivalent to kernel_fpu_begin()
(kernel_neon_begin_partial() in arch/arm64/kernel/fpsimd.c) appears to have an
optimization where the SIMD registers aren't saved if they were already saved.
I wonder why something similar isn't done on x86.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v5 0/5] Update LZ4 compressor module

2017-01-26 Thread Eric Biggers

On Thu, Jan 26, 2017 at 08:57:30AM +0100, Sven Schmidt wrote:
> 
> This patchset is for updating the LZ4 compression module to a version based
> on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> which provides an "acceleration" parameter as a tradeoff between
> high compression ratio and high compression speed.
> 
> We want to use LZ4 fast in order to support compression in lustre
> and (mostly, based on that) investigate data reduction techniques in behalf of
> storage systems.
> 
> Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> it is possible to enable applications to use fast and/or high compression
> depending on the usecase.
> For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> LZ4 in the kernel.
> 

Hi Sven,

[For some reason I didn't receive patch 1/5 and had to get it from patchwork...
I'm not sure why.  I'm subscribed to linux-crypto but not linux-kernel.]

The proposed patch defines LZ4_MEMORY_USAGE to 10 which means that LZ4
compression will use a hash table of only 1024 bytes, containing only 256
entries, to find matches.  This differs from upstream LZ4 1.7.3, which uses
LZ4_MEMORY_USAGE of 14, as well as the previous LZ4 included in the Linux
kernel, both of which specify the hash table size to be 16384 bytes, containing
4096 entries.

Given that varying the hash table size is a trade-off between memory usage,
speed, and compression ratio, is this an intentional difference and has it been
benchmarked?

Also, in lz4defs.h:

> #if defined(__x86_64__)
>typedef U64 reg_t;   /* 64-bits in x32 mode */
> #else
>typedef size_t reg_t;/* 32-bits in x32 mode */
> #endif

Are you sure this really needed over just always using size_t?

> #if LZ4_ARCH64
> #ifdef __BIG_ENDIAN__
> #define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> #else
> #define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> #endif
> #else
> #ifdef __BIG_ENDIAN__
> #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
> #else
> #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
> #endif
> #endif

LZ4_NBCOMMONBYTES() is defined incorrectly for 64-bit little endian; it should
be using __builtin_ctzll().

Nit: can you also clean up the weird indentation (e.g. double tabs) in
lz4defs.h?

Thanks,

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: 4.10 aesni-intel no longer having lrw/ablk_helper dependencies?

2017-01-29 Thread Eric Biggers

On Sun, Jan 29, 2017 at 10:31:59PM +0100, Arkadiusz Miskiewicz wrote:
> Hi.
> 
> [arekm@xps ~]$ modinfo --set-version 4.9.6 aesni-intel | grep depends
> depends:glue_helper,aes-x86_64,lrw,cryptd,ablk_helper
> 
> [arekm@xps ~]$ modinfo --set-version  4.10.0-rc5-00161-gfd694aaa46c7 aesni-
> intel |grep depends
> depends:glue_helper,aes-x86_64,crypto_simd,cryptd
> 
> With 4.10.0 lrw and ablk_helper are no longer in dependencies while
> aesni-intel seem to still need these. My luks encrypted rootfs fails to 
> unlock. Initrd generation script only installs modules based on dependencies 
> and that seems to be a reason for luks unlock failure with 4.10rc - some 
> missing modules.
> 
> Failure looks like that:
> $ insmod path/to/aesni-intel.ko
> dmesg gets logged:
> "AVX2 version of gcm_enc/dec engaged
> AES CTR mode by8 optimization enabled."
> and insmod reports that it cannot insert aesni-intel because of unresolved 
> symbol or unknown option but nothing more is logged in dmesg.
> 
> 4.9.x works fine.
> 
> What did change/how to figure out deps now?
> 
> Both modules exist:

Hi Arkadiusz,

First, aesni-intel no longer includes an LRW implementation itself.  Instead,
the generic LRW module must be selected.  Internally it will use the aesni-intel
accelerated ECB algorithm if available.  So you need to make sure that the "lrw"
module is included in the initrd if it's not already.

But I think the bigger problem is that aesni-intel couldn't be insmod'ed at all,
which shouldn't happen.  The problem might actually be related to the "pcbc"
algorithm.  Upon initialization, aesni-intel now tries to wrap
"pcbc(__aes-aesni)" with the "fpu" template.  This will fail if the "pcbc"
module hasn't been inserted.  I think this wasn't a problem before because the
old code using ablk_helper instead of crypto_simd didn't try to find "pcbc"
until someone asked for it, while now aesni-intel will try to find it
immediately.  And since aesni-intel has no direct dependency on pcbc, I'm
guessing what happened is that pcbc didn't end up in your initrd even though it
may have been built.  (You can verify this by adding pcbc to your initrd and
seeing if that works around the problem.)

Herbert, would it make any sense to solve this by creating a real dependency of
aesni-intel on pcbc, by making aesni-intel reference an exported symbol in pcbc
if IS_ENABLED(CONFIG_PCBC)?  Or do you / did you have something else in mind?

I also think it's kind of weird that aesni-intel has to do anything with pcbc at
all since it doesn't actually implement pcbc specifically; it's just wrapping it
in the "fpu" template to avoid so many calls to kernel_fpu_begin/kernel_fpu_end.
I wonder if there is a better solution to this, maybe even optimizing
kernel_fpu_begin/kernel_fpu_end so that the wrapping isn't needed.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] dm: switch dm-verity to async hash crypto API

2017-01-29 Thread Eric Biggers

On Sun, Jan 29, 2017 at 09:39:20AM +0200, Gilad Ben-Yossef wrote:
> Hi Odrej,
> 
> On Thu, Jan 26, 2017 at 1:34 PM, Ondrej Mosnáček
>  wrote:
> > Hi Gilad,
> >
> > 2017-01-24 15:38 GMT+01:00 Gilad Ben-Yossef :
> >> -   v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
> >> +   v->tfm = crypto_alloc_ahash(v->alg_name, 0, CRYPTO_ALG_ASYNC);
> >
> > I believe you should pass zero as the mask here. When flags == 0 and
> > mask == CRYPTO_ALG_ASYNC, you are basically saying "I want only algs
> > that have flags & CRYPTO_ALG_ASYNC == 0", which means you should only
> > get ahash tfms that are always synchronous (see [1]). However, since
> > you set a non-NULL callback in verity_hash_init, I don't think this
> > was your intention. By setting the mask to zero, you should be able to
> > get also an actual async tfm.
> >
> > Thanks,
> > Ondrej
> >
> > [1] https://lkml.org/lkml/2016/12/13/904
> 
> Thank you very much for the review.
> 
> I am the first to admit that I find the Crypto API very cryptic (pun
> intended)... :-)
> 
> I actually followed the example in Documentation/crypto/api-intro.txt.
> I see now the example is
> not doing what I though it was doing. Thank you for clarifying this. I
> will send out a 2nd version.
> 
> The thing I find puzzling in this is that I saw a difference in
> latency when a async algorythm
> provider driver with high priority (3000) was loaded vs. not. Based on
> your description I would
> expect the performance not to change. I will retest with the change
> and will publish the results.
> 
> Thanks!
> Gilad

Hi Gilad,

One thing to keep in mind is that the crypto API somewhat conflates the concept
of "is the algorithm asynchronous?" with "does this algorithm operate on virtual
memory or physical memory?".  The shash API is both synchronous and operates on
virtual memory, and when using it you only have access to shash algorithms.  The
ahash API on the other hand operates on physical memory and can be used either
synchronously or asynchronously.  In addition, both shash and ahash algorithms
may be accessed through the ahash API, and for shash algorithms the API will
handle translating physical addresses to virtual addresses.

So simply by using the ahash API instead of the shash API, even still requiring
a "synchronous" algorithm, you may gain access to a different implementation of
the algorithm, thereby changing the performance.  So whenever doing any
benchmarks I suggest including the actual driver name (cra_driver_name) of the
algorithms used; otherwise it may be unclear why the performance changed.

As for the patch, I haven't looked at it in detail, but I agree that if
dm-verity is indeed operating on sufficiently large buffers in physically
contiguous memory, then the ahash API would be better than the shash API.  Note,
that it also could be useful look into supporting having multiple async requests
issued and pending at the same time, similar to what dm-crypt does.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH] crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic

2017-02-01 Thread Eric Biggers

On Mon, Jan 30, 2017 at 02:11:29PM +, Ard Biesheuvel wrote:
> Instead of unconditionally forcing 4 byte alignment for all generic
> chaining modes that rely on crypto_xor() or crypto_inc() (which may
> result in unnecessary copying of data when the underlying hardware
> can perform unaligned accesses efficiently), make those functions
> deal with unaligned input explicitly, but only if the Kconfig symbol
> HAVE_EFFICIENT_UNALIGNED_ACCESS is set. This will allow us to drop
> the alignmasks from the CBC, CMAC, CTR, CTS, PCBC and SEQIV drivers.
> 
> For crypto_inc(), this simply involves making the 4-byte stride
> conditional on HAVE_EFFICIENT_UNALIGNED_ACCESS being set, given that
> it typically operates on 16 byte buffers.
> 
> For crypto_xor(), an algorithm is implemented that simply runs through
> the input using the largest strides possible if unaligned accesses are
> allowed. If they are not, an optimal sequence of memory accesses is
> emitted that takes the relative alignment of the input buffers into
> account, e.g., if the relative misalignment of dst and src is 4 bytes,
> the entire xor operation will be completed using 4 byte loads and stores
> (modulo unaligned bits at the start and end). Note that all expressions
> involving startalign and misalign are simply eliminated by the compiler
> if HAVE_EFFICIENT_UNALIGNED_ACCESS is defined.
> 

Hi Ard,

This is a good idea, and I think it was error-prone to be requiring 4-byte
alignment always, and also inefficient on many architectures.

The new crypto_inc() looks fine, but the new crypto_xor() is quite complicated.
I'm wondering whether it has to be that way, especially since it seems to most
commonly be used on very small input buffers, e.g. 8 or 16-byte blocks.  There
are a couple trivial ways it could be simplified, e.g. using 'dst' and 'src'
directly instead of 'a' and 'b' (which also seems to improve code generation by
getting rid of the '+= len & ~mask' parts), or using sizeof(long) directly
instead of 'size' and 'mask'.

But also when I tried testing the proposed crypto_xor() on MIPS, it didn't work
correctly on a misaligned buffer.  With startalign=1, it did one iteration of
the following loop and then exited with startalign=0 and entered the "unsigned
long at a time" loop, which is incorrect since at that point the buffers were
not yet fully aligned:

>   do {
>   if (len < sizeof(u8))
>   break;
>
>   if (len >= size && !(startalign & 1) && !(misalign & 1))
>   break;
>
>   *dst++ ^= *src++;
>   len -= sizeof(u8);
>   startalign &= ~sizeof(u8);
>   } while (misalign & 1);

I think it would need to do instead:

startalign += sizeof(u8);
startalign %= sizeof(unsigned long);

But I am wondering whether you considered something simpler, using the
get_unaligned/put_unaligned helpers, maybe even using a switch statement for the
last (sizeof(long) - 1) bytes so it can be compiled as a jump table.  Something
like this:

#define xor_unaligned(dst, src) \
put_unaligned(get_unaligned(dst) ^ get_unaligned(src), (dst))

void crypto_xor(u8 *dst, const u8 *src, unsigned int len)
{
while (len >= sizeof(unsigned long)) {
xor_unaligned((unsigned long *)dst, (unsigned long *)src);
dst += sizeof(unsigned long);
src += sizeof(unsigned long);
len -= sizeof(unsigned long);
}

switch (len) {
#ifdef CONFIG_64BIT
case 7:
dst[6] ^= src[6];
/* fall through */
case 6:
xor_unaligned((u16 *)&dst[4], (u16 *)&src[4]);
goto len_4;
case 5:
dst[4] ^= src[4];
/* fall through */
case 4:
len_4:
xor_unaligned((u32 *)dst, (u32 *)src);
break;
#endif
case 3:
dst[2] ^= src[2];
/* fall through */
case 2:
xor_unaligned((u16 *)dst, (u16 *)src);
break;
case 1:
dst[0] ^= src[0];
break;
}
}

That would seem like a better choice for small buffers, which seems to be the
more common case.  It should generate slightly faster code on architectures with
fast unaligned access like x86_64, while still being sufficient on architectures
without --- perhaps even faster, since it wouldn't have as many branches.

Eric

Re: [RFC PATCH v2 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM

2017-02-01 Thread Eric Biggers

Hi Ard,

On Sat, Jan 28, 2017 at 11:33:33PM +, Ard Biesheuvel wrote:
> 
> Note that this only implements AES encryption, which is all we need
> for CTR and CBC-MAC. AES decryption can easily be implemented in a
> similar way, but is significantly more costly.

Is the expectation of decryption being more costly the only reason why "aes-ti"
couldn't be a "cipher" algorithm, allowing it to automatically be used by the
existing templates for CTR, CBC-MAC, CBC, ECB, XTS, CMAC, etc.?  It doesn't seem
to do anything expensive on a per-block basis like loading SSE registers, so it
seems it would fit better as a "cipher" algorithm if at all possible.  Then
there would be no need to implement all these modes yet again.

Also, what would be the feasibility of simply replacing aes-generic with the
time-invariant implementation, rather than offering two implementations and
requiring users to choose one, usually without the needed expertise?

> +
> +/*
> + * Emit the sbox as __weak with external linkage to prevent the compiler
> + * from doing constant folding on sbox references involving fixed indexes.
> + */
> +__weak const u8 __cacheline_aligned __aesti_sbox[] = {

Did you consider marking it 'volatile' instead?

> +static int aesti_set_key(struct aes_ti_ctx *ctx, const u8 *in_key,
> +  unsigned int key_len)
> +{
> + struct crypto_aes_ctx rk;
> + int err;
> +
> + err = crypto_aes_expand_key(&rk, in_key, key_len);
> + if (err)
> + return err;

crypto_aes_expand_key() assumes that the key is u32-aligned; I don't think
that's guaranteed here.

Eric

Re: [PATCH v2] crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic

2017-02-04 Thread Eric Biggers

Hi Ard,

On Thu, Feb 02, 2017 at 03:56:28PM +, Ard Biesheuvel wrote:
> + const int size = sizeof(unsigned long);
> + int delta = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
> + int misalign = 0;
> +
> + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && delta) {
> + misalign = 1 << __ffs(delta);
> +
> + /*
> +  * If we care about alignment, process as many bytes as
> +  * needed to advance dst and src to values whose alignments
> +  * equal their relative misalignment. This will allow us to
> +  * process the remainder of the input using optimal strides.
> +  */
> + while (((unsigned long)dst & (misalign - 1)) && len > 0) {
> + *dst++ ^= *src++;
> + len--;
> + }
> + }
>  
> + while (len >= size && !misalign) {
> + *(unsigned long *)dst ^= *(unsigned long *)src;
> + dst += size;
> + src += size;
> + len -= size;
> + }
>  

Unfortunately this is still broken, for two different reasons.  First, if the
pointers have the same relative misalignment, then 'delta' and 'misalign' will
be set to 0 and long accesses will be used, even though the pointers may
actually be misaligned, e.g. 0x8001 and 0x9001.  Second, if the pointers
have a relative misalignent that is not a power-of-2, then 'misalign' will be
set to the wrong value.  For example, with delta=3, it's actually only safe to
do byte accesses, but the code will set misalign=2 and do u16 accesses.

I kind of liked the version with put_unaligned/get_unaligned (and it seems to
perform okay on MIPS, though not on ARM which is probably more important).  But
if the various cases with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS are going to
be handled/optimized I think they will just need to be separated out, maybe
something like this:

void crypto_xor(u8 *dst, const u8 *src, unsigned int len)
{
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
unsigned long delta;

if (((unsigned long)dst | (unsigned long)src | len) %
sizeof(unsigned long) == 0) {
/* fast path: everything is aligned, including len */
while (len >= sizeof(unsigned long)) {
*(unsigned long *)dst ^= *(unsigned long *)src;
dst += sizeof(unsigned long);
src += sizeof(unsigned long);
len -= sizeof(unsigned long);
}
return;
}

/* handle relative misalignment */
delta = (unsigned long)dst ^ (unsigned long)src;
if (delta % sizeof(unsigned long)) {

/* 1-byte relative misalignment: do byte accesses */
if (delta & 1) {
while (len--)
*dst++ ^= *src++;
return;
}

/* 2-byte relative misalignment: do u16 accesses */
if ((delta & 2) || sizeof(unsigned long) == 4) {
if ((unsigned long)dst % 2 && len) {
*dst++ ^= *src++;
len--;
}
while (len >= 2) {
*(u16 *)dst ^= *(u16 *)src;
dst += 2, src += 2, len -= 2;
}
if (len)
*dst ^= *src;
return;
}

/* 4-byte relative misalignment: do u32 accesses */
while ((unsigned long)dst % 4 && len) {
*dst++ ^= *src++;
len--;
}
while (len >= 4) {
*(u32 *)dst ^= *(u32 *)src;
dst += 4, src += 4, len -= 4;
}
while (len--)
*dst++ ^= *src++;
return;
}

/* handle absolute misalignment */
while ((unsigned long)dst % sizeof(unsigned long) && len) {
*dst++ ^= *src++;
len--;
}
#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */

while (len >= sizeof(unsigned long)) {
*(unsigned long *)dst ^= *(unsigned long *)src;
dst += sizeof(unsigned long);
src += sizeof(unsigned long);
len -= sizeof(unsigned long);
}

while (len--)
*dst++ ^= *src++;
}

Re: [PATCH v2] crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic

2017-02-04 Thread Eric Biggers

On Sat, Feb 04, 2017 at 01:20:38PM -0800, Eric Biggers wrote:
> Unfortunately this is still broken, for two different reasons.  First, if the
> pointers have the same relative misalignment, then 'delta' and 'misalign' will
> be set to 0 and long accesses will be used, even though the pointers may
> actually be misaligned, e.g. 0x8001 and 0x9001.  Second, if the 
> pointers
> have a relative misalignent that is not a power-of-2, then 'misalign' will be
> set to the wrong value.  For example, with delta=3, it's actually only safe to
> do byte accesses, but the code will set misalign=2 and do u16 accesses.
> 

Correction: for the second issue I think I mixed up ffs and fls, so that part of
the code was right.  But it may still be a good idea to separate out the
different cases.

Eric

Re: [RFC PATCH] crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic

2017-02-04 Thread Eric Biggers

On Sun, Feb 05, 2017 at 12:10:53AM +0100, Jason A. Donenfeld wrote:
> Another thing that might be helpful is that you can let gcc decide on
> the alignment, and then optimize appropriately. Check out what we do
> with siphash:
> 
> https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/tree/include/linux/siphash.h#n76
> 
> static inline u64 siphash(const void *data, size_t len, const
> siphash_key_t *key)
> {
> #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
> return __siphash_unaligned(data, len, key);
> #endif
> return ___siphash_aligned(data, len, key);
> }
> 
> With this trick, we fall through to the fast alignment-assuming code,
> if gcc can prove that the address is inlined. This is often the case
> when passing structs, or when passing buffers that have
> __aligned(BLOCKSIZE). It proves to be a very useful optimization on
> some platforms.

Yes, this is a good idea.  Though it seems that usually at least one of the two
pointers passed to crypto_xor() will have alignment unknown to the compiler,
sometimes the length is constant which inlining can help a lot for.  For
example, if someone does crypto_xor(foo, bar, 16) on x86_64 or ARM64, we'd
really like it to turn into just a few instructions like this:

mov(%rsi),%rax
xor%rax,(%rdi)
mov0x8(%rsi),%rax
xor%rax,0x8(%rdi)

So how about inlining crypto_xor() if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS or
the pointers are long-aligned, otherwise calling an out-of-line function
__crypto_xor_unaligned() that handles all the cases with weird alignment.
Something like the following patch:

(Note: exactly how __crypto_xor_unaligned() is implemented is still debatable;
it could be more similar to Ard's proposal, or it could use the unaligned access
helpers.)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index df939b54b09f..a0591db3f13a 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -972,23 +972,69 @@ void crypto_inc(u8 *a, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(crypto_inc);
 
-static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+void __crypto_xor_unaligned(u8 *dst, const u8 *src, unsigned int len)
 {
-   for (; size; size--)
-   *a++ ^= *b++;
-}
+   unsigned long delta = (unsigned long)dst ^ (unsigned long)src;
 
-void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
-{
-   u32 *a = (u32 *)dst;
-   u32 *b = (u32 *)src;
+   /* Handle relative misalignment */
+   if (delta % sizeof(unsigned long)) {
+
+   /* 1-byte relative misalignment? */
+   if (delta & 1) {
+   while (len--)
+   *dst++ ^= *src++;
+   return;
+   }
 
-   for (; size >= 4; size -= 4)
-   *a++ ^= *b++;
+   /* 2-byte relative misalignment? */
+   if ((delta & 2) || sizeof(unsigned long) == 4) {
+   if ((unsigned long)dst % __alignof__(u16) && len) {
+   *dst++ ^= *src++;
+   len--;
+   }
+   while (len >= 2) {
+   *(u16 *)dst ^= *(u16 *)src;
+   dst += 2, src += 2, len -= 2;
+   }
+   if (len)
+   *dst ^= *src;
+   return;
+   }
+
+   /* 4-byte relative misalignment? */
+   while ((unsigned long)dst % __alignof__(u32) && len) {
+   *dst++ ^= *src++;
+   len--;
+   }
+   while (len >= 4) {
+   *(u32 *)dst ^= *(u32 *)src;
+   dst += 4, src += 4, len -= 4;
+   }
+   while (len--)
+   *dst++ ^= *src++;
+   return;
+   }
+
+   /* No relative misalignment; use word accesses */
+
+   while ((unsigned long)dst % __alignof__(unsigned long) && len) {
+   *dst++ ^= *src++;
+   len--;
+   }
+
+   while (len >= sizeof(unsigned long)) {
+   *(unsigned long *)dst ^= *(unsigned long *)src;
+   dst += sizeof(unsigned long);
+   src += sizeof(unsigned long);
+   len -= sizeof(unsigned long);
+   }
 
-   crypto_xor_byte((u8 *)a, (u8 *)b, size);
+   while (len--)
+   *dst++ ^= *src++;
 }
-EXPORT_SYMBOL_GPL(crypto_xor);
+EXPORT_SYMBOL_GPL(__crypto_xor_unaligned);
+#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
 
 unsigned int crypto_alg_extsize(struct crypto_alg *alg)
 {
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 404e9558e879..718145c5eaca 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -191,9 +191,29

Re: [PATCH v7 0/5] Update LZ4 compressor module

2017-02-08 Thread Eric Biggers

On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> 
> Today, I did zram-lz4 performance test with fio in current mmotm and
> found it makes regression about 20%.
> 

This may or may not be the cause of the specific regression you're observing,
but I just noticed that the proposed patch drops a lot of FORCEINLINE
annotations from upstream LZ4.  The FORCEINLINE's are there for a reason,
especially for the main decompression and compression functions which are
basically "templates" that take in different sets of constant parameters, and
should be left in.  We should #define FORCEINLINE to __always_inline somewhere,
or just do a s/FORCEINLINE/__always_inline/g.

Note that the upstream LZ4 code is very carefully optimized, so we should not,
in general, be changing things like when functions are force-inlined, what the
hash table size is, etc.

[Also, for some reason linux-crypto is apparently still not receiving patch 1/5
in the series.  It's missing from the linux-crypto archive at
http://www.spinics.net/lists/linux-crypto/, so it's not just me.]

Thanks!

Eric

Re: [PATCH v7 0/5] Update LZ4 compressor module

2017-02-08 Thread Eric Biggers

Also I noticed another bug, this time in LZ4_count():

> #if defined(CONFIG_64BIT)
> #define LZ4_ARCH64 1
> #else
> #define LZ4_ARCH64 0
> #endif
...
> #ifdef LZ4_ARCH64
>if ((pIn < (pInLimit-3))
>&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
>pIn += 4; pMatch += 4;
>}
> #endif

Because of how LZ4_ARCH64 is defined, it needs to be '#if LZ4_ARCH64'.

But I also think the way upstream LZ4 does 64-bit detection could have just been
left as-is; it has a function which gets inlined:

static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }

Eric

Re: [PATCH v7 0/5] Update LZ4 compressor module

2017-02-09 Thread Eric Biggers

On Thu, Feb 09, 2017 at 12:05:40PM +0100, Sven Schmidt wrote:
> > Because of how LZ4_ARCH64 is defined, it needs to be '#if LZ4_ARCH64'.
> > 
> > But I also think the way upstream LZ4 does 64-bit detection could have just 
> > been
> > left as-is; it has a function which gets inlined:
> > 
> > static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
> > 
> > Eric
> 
> does this apply for LZ4_isLittleEndian() as well? As a reminder:
>   
>   static unsigned LZ4_isLittleEndian(void)
>   {
>   /* don't use static : performance detrimental */
>   const union { U32 u; BYTE c[4]; } one = { 1 };
> 
>   return one.c[0];
>   }
> 
> It is surely easier to read and understand using these functions in favor of 
> the macros.

Yes, it makes sense to retain LZ4_isLittleEndian() too, mainly so that the call
sites don't need to be changed and we have less chance of introducing bugs.

I also believe the *implementation* of LZ4_isLittleEndian() using the union
"hack" to detecting endianness works fine and will get optimized correctly,
though we could replace it with an #ifdef __LITTLE_ENDIAN__ if we wanted to.  (I
am sure that upstream LZ4 would do that if it was guaranteed to work, but
upstream LZ4 needs to detect endianness reliably across many more different
compilers, compiler versions, and platforms than the Linux kernel does, and
there is no standard preprocessor macro for doing so.)

Eric

Re: [PATCH v7 0/5] Update LZ4 compressor module

2017-02-09 Thread Eric Biggers

On Thu, Feb 09, 2017 at 12:02:11PM +0100, Sven Schmidt wrote:
> > 
> > [Also, for some reason linux-crypto is apparently still not receiving patch 
> > 1/5
> > in the series.  It's missing from the linux-crypto archive at
> > http://www.spinics.net/lists/linux-crypto/, so it's not just me.]
> > 
> 
> I don't really know what to do about this. I think the matter is the size of 
> the E-Mail.
> Are there filters or something like that? Since in linux-kernel the patch 
> seems to get delivered.
> I could otherwise CC you if you wish.
> 

If I'm not mistaken, David Miller is the admin of the mail server on
vger.kernel.org, and he already happens to be Cc'ed on this thread, so maybe he
can answer as to why linux-crypto may be configured differently?

Anyway, since the patch did make it to linux-kernel anyone can still download it
from patchwork if they know where to look:
https://patchwork.kernel.org/patch/9556271/

Eric

Re: [PATCH] lz4: fix performance regressions

2017-02-12 Thread Eric Biggers

Hi Sven,

On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
>  /*-
>   *   Reading and writing into memory
>   **/
> +typedef union {
> + U16 u16;
> + U32 u32;
> + size_t uArch;
> +} __packed unalign;
> 
> -static inline U16 LZ4_read16(const void *memPtr)
> +static FORCE_INLINE __maybe_unused U16 LZ4_read16(const void *ptr)
>  {
> - U16 val;
> -
> - memcpy(&val, memPtr, sizeof(val));
> -
> - return val;
> + return ((const unalign *)ptr)->u16;
>  }
> 
> -static inline U32 LZ4_read32(const void *memPtr)
> +static FORCE_INLINE __maybe_unused U32 LZ4_read32(const void *ptr)
>  {
> - U32 val;
> -
> - memcpy(&val, memPtr, sizeof(val));
> -
> - return val;
> + return ((const unalign *)ptr)->u32;
>  }
> 
> -static inline size_t LZ4_read_ARCH(const void *memPtr)
> +static FORCE_INLINE __maybe_unused size_t LZ4_read_ARCH(const void *ptr)
>  {
> - size_t val;
> -
> - memcpy(&val, memPtr, sizeof(val));
> -
> - return val;
> + return ((const unalign *)ptr)->uArch;
>  }
> 
> -static inline void LZ4_write16(void *memPtr, U16 value)
> +static FORCE_INLINE __maybe_unused void LZ4_write16(void *memPtr, U16 value)
>  {
> - memcpy(memPtr, &value, sizeof(value));
> + ((unalign *)memPtr)->u16 = value;
>  }
> 
> -static inline void LZ4_write32(void *memPtr, U32 value)
> -{
> - memcpy(memPtr, &value, sizeof(value));
> +static FORCE_INLINE __maybe_unused void LZ4_write32(void *memPtr, U32 value) 
> {
> + ((unalign *)memPtr)->u32 = value;
>  }
> 
> -static inline U16 LZ4_readLE16(const void *memPtr)
> +static FORCE_INLINE __maybe_unused U16 LZ4_readLE16(const void *memPtr)
>  {
> -#ifdef __LITTLE_ENDIAN__
> +#if LZ4_LITTLE_ENDIAN
>   return LZ4_read16(memPtr);
>  #else
>   const BYTE *p = (const BYTE *)memPtr;
> @@ -137,19 +143,19 @@ static inline U16 LZ4_readLE16(const void *memPtr)
>  #endif
>  }

Since upstream LZ4 is intended to be compiled at -O3, this may allow it to get
away with using memcpy() for unaligned memory accesses.  The reason it uses
memcpy() is that, other than a byte-by-byte copy, it is the only portable way to
express unaligned memory accesses.  But the Linux kernel is sometimes compiled
optimized for size (-Os), and I wouldn't be *too* surprised if some of the
memcpy()'s don't always get inlined then, which could be causing the performance
regression being observed.  (Of course, this could be verified by checking
whether CONFIG_CC_OPTIMIZE_FOR_SIZE=y is set, then reading the assembly.)

But I don't think accessing a __packed structure directly is the right
alternative.  Instead, Linux already includes macros for unaligned memory
accesses which have been optimized for every supported architecture.  Those
should just be used instead, e.g. like this:

static FORCE_INLINE U16 LZ4_read16(const void *ptr)
{
return get_unaligned((const u16 *)ptr);
}

static FORCE_INLINE U32 LZ4_read32(const void *ptr)
{
return get_unaligned((const u32 *)ptr);
}

static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
{
return get_unaligned((const size_t *)ptr);
}

static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
{
put_unaligned(value, (u16 *)memPtr);
}

static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
{
put_unaligned(value, (u32 *)memPtr);
}

static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
{
return get_unaligned_le16(memPtr);
}

static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
{
return put_unaligned_le16(value, memPtr);
}

static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
{
if (LZ4_64bits()) {
u64 a = get_unaligned((const u64 *)src);
put_unaligned(a, (u64 *)dst);
} else {
u32 a = get_unaligned((const u32 *)src);
u32 b = get_unaligned((const u32 *)src + 1);
put_unaligned(a, (u32 *)dst);
put_unaligned(b, (u32 *)dst + 1);
}
}


Note that I dropped __maybe_unused as it's not needed on inline functions.
That should be done everywhere else the patch proposes to add it too.

> -#if LZ4_ARCH64
> -#ifdef __BIG_ENDIAN__
> -#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> +static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
> +{
> +#if LZ4_LITTLE_ENDIAN
> +#if LZ4_ARCH64 /* 64 Bits Little Endian */
> +#if defined(LZ4_FORCE_SW_BITCOUNT)
> + static const int DeBruijnBytePos[64] = {
> + 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
> + 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
> + 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
> + 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
> + };
> +
> + return DeBruijnBytePos[((U64)((val & -(long long)val)
> + * 0x0218A392CDABBD3FULL)) >> 58];
>  #else
> -#define LZ4_NBCOMMONBYTES(val) (__builtin_

[PATCH 3/4] crypto: gf128mul - rename the byte overflow tables

2017-02-14 Thread Eric Biggers

Though the GF(2^128) byte overflow tables were named the "lle" and "bbe"
tables, they are not actually tied to these element formats
specifically, but rather to particular a "bit endianness".  For example,
the bbe table is actually used for both bbe and ble multiplication.
Therefore, rename the tables to "le" and "be" and update the comment to
explain this.

Cc: Alex Cope 
Signed-off-by: Eric Biggers 
---
 crypto/gf128mul.c | 49 -
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index c050cf6f5aa9..1fde1c79ffa5 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -88,31 +88,46 @@
q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
 }
 
-/* Given the value i in 0..255 as the byte overflow when a field element
-in GHASH is multiplied by x^8, this function will return the values that
-are generated in the lo 16-bit word of the field value by applying the
-modular polynomial. The values lo_byte and hi_byte are returned via the
-macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
-memory as required by a suitable definition of this macro operating on
-the table above
-*/
+/*
+ * Given a value i in 0..255 as the byte overflow when a field element
+ * in GF(2^128) is multiplied by x^8, the following macro returns the
+ * 16-bit value that must be XOR-ed into the low-degree end of the
+ * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
+ *
+ * There are two versions of the macro, and hence two tables: one for
+ * the "be" convention where the highest-order bit is the coefficient of
+ * the highest-degree polynomial term, and one for the "le" convention
+ * where the highest-order bit is the coefficient of the lowest-degree
+ * polynomial term.  In both cases the values are stored in CPU byte
+ * endianness such that the coefficients are ordered consistently across
+ * bytes, i.e. in the "be" table bits 15..0 of the stored value
+ * correspond to the coefficients of x^15..x^0, and in the "le" table
+ * bits 15..0 correspond to the coefficients of x^0..x^15.
+ *
+ * Therefore, provided that the appropriate byte endianness conversions
+ * are done by the multiplication functions (and these must be in place
+ * anyway to support both little endian and big endian CPUs), the "be"
+ * table can be used for multiplications of both "bbe" and "ble"
+ * elements, and the "le" table can be used for multiplications of both
+ * "lle" and "lbe" elements.
+ */
 
-#define xda_bbe(i) ( \
+#define xda_be(i) ( \
(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
 )
 
-#define xda_lle(i) ( \
+#define xda_le(i) ( \
(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
 )
 
-static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
-static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
+static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
+static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
 
 /*
  * The following functions multiply a field element by x or by x^8 in
@@ -125,7 +140,7 @@ static void gf128mul_x_lle(be128 *r, const be128 *x)
 {
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
-   u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
+   u64 _tt = gf128mul_table_le[(b << 7) & 0xff];
 
r->b = cpu_to_be64((b >> 1) | (a << 63));
r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
@@ -135,7 +150,7 @@ static void gf128mul_x_bbe(be128 *r, const be128 *x)
 {
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
-   u64 _tt = gf128mul_table_bbe[a >> 63];
+   u64 _tt = gf128mul_table_be[a >> 63];
 
r->a = cpu_to_be64((a << 1) | (b >> 63));
r->b = cpu_to_be64((b << 1) ^ _tt);
@@ -145,7 +160,7 @@ void gf128mul_x_ble(be128 *r, const be128 *x)
 {
u64 a = le64_to_cpu(x->a);
u64 b = le64_to_cpu(x->b);
-   u64 _tt = gf128mul_table_bbe[b >> 63];
+   u64 _tt = gf128mul_table_be[b >> 63];
 
r->a = cpu_to_le64((a << 1) ^ _tt);
r->b = cpu_to_le64((b << 1) | (a >> 63));
@@ -156,7 +171,7 @@ static void gf128mul_x8_lle(be128 *x)
 {
u64 a = be64_to_

[PATCH 0/4] crypto: gf128mul cleanups

2017-02-14 Thread Eric Biggers

This patchset makes a few cleanups to the generic GF(2^128) multiplication code
to make it slightly easier to understand and modify.  No functional changes are
intended.

Eric Biggers (4):
  crypto: gf128mul - fix some comments
  crypto: gf128mul - remove xx() macro
  crypto: gf128mul - rename the byte overflow tables
  crypto: gf128mul - constify 4k and 64k multiplication tables

 crypto/gf128mul.c | 86 +++
 include/crypto/gf128mul.h | 32 +-
 2 files changed, 67 insertions(+), 51 deletions(-)

-- 
2.11.0.483.g087da7b7c-goog

[PATCH 4/4] crypto: gf128mul - constify 4k and 64k multiplication tables

2017-02-14 Thread Eric Biggers

Constify the multiplication tables passed to the 4k and 64k
multiplication functions, as they are not modified by these functions.

Cc: Alex Cope 
Signed-off-by: Eric Biggers 
---
 crypto/gf128mul.c | 6 +++---
 include/crypto/gf128mul.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 1fde1c79ffa5..04facc0690aa 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -329,7 +329,7 @@ void gf128mul_free_64k(struct gf128mul_64k *t)
 }
 EXPORT_SYMBOL(gf128mul_free_64k);
 
-void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
+void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
 {
u8 *ap = (u8 *)a;
be128 r[1];
@@ -402,7 +402,7 @@ struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
 }
 EXPORT_SYMBOL(gf128mul_init_4k_bbe);
 
-void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
+void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
 {
u8 *ap = (u8 *)a;
be128 r[1];
@@ -417,7 +417,7 @@ void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
 }
 EXPORT_SYMBOL(gf128mul_4k_lle);
 
-void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
+void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
 {
u8 *ap = (u8 *)a;
be128 r[1];
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h
index 9662c4538873..0bc9b5f1c45e 100644
--- a/include/crypto/gf128mul.h
+++ b/include/crypto/gf128mul.h
@@ -174,8 +174,8 @@ struct gf128mul_4k {
 
 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
-void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t);
-void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t);
+void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t);
+void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t);
 
 static inline void gf128mul_free_4k(struct gf128mul_4k *t)
 {
@@ -196,6 +196,6 @@ struct gf128mul_64k {
  */
 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
 void gf128mul_free_64k(struct gf128mul_64k *t);
-void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t);
+void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t);
 
 #endif /* _CRYPTO_GF128MUL_H */
-- 
2.11.0.483.g087da7b7c-goog

[PATCH 2/4] crypto: gf128mul - remove xx() macro

2017-02-14 Thread Eric Biggers

The xx() macro serves no purpose and can be removed.

Cc: Alex Cope 
Signed-off-by: Eric Biggers 
---
 crypto/gf128mul.c | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index d9e3eecc218a..c050cf6f5aa9 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -97,20 +97,18 @@
 the table above
 */
 
-#define xx(p, q)   0x##p##q
-
 #define xda_bbe(i) ( \
-   (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
-   (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
-   (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
-   (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
+   (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
+   (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
+   (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
+   (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
 )
 
 #define xda_lle(i) ( \
-   (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
-   (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
-   (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
-   (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
+   (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
+   (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
+   (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
+   (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
 )
 
 static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
-- 
2.11.0.483.g087da7b7c-goog

[PATCH 1/4] crypto: gf128mul - fix some comments

2017-02-14 Thread Eric Biggers

Fix incorrect references to GF(128) instead of GF(2^128), as these are
two entirely different fields, and fix a few other incorrect comments.

Cc: Alex Cope 
Signed-off-by: Eric Biggers 
---
 crypto/gf128mul.c | 13 +++--
 include/crypto/gf128mul.h | 26 ++
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 72015fee533d..d9e3eecc218a 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -44,7 +44,7 @@
  ---
  Issue 31/01/2006
 
- This file provides fast multiplication in GF(128) as required by several
+ This file provides fast multiplication in GF(2^128) as required by several
  cryptographic authentication modes
 */
 
@@ -116,9 +116,10 @@
 static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
 static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
 
-/* These functions multiply a field element by x, by x^4 and by x^8
- * in the polynomial field representation. It uses 32-bit word operations
- * to gain speed but compensates for machine endianess and hence works
+/*
+ * The following functions multiply a field element by x or by x^8 in
+ * the polynomial field representation.  They use 64-bit word operations
+ * to gain speed but compensate for machine endianness and hence work
  * correctly on both styles of machine.
  */
 
@@ -251,7 +252,7 @@ EXPORT_SYMBOL(gf128mul_bbe);
 
 /*  This version uses 64k bytes of table space.
 A 16 byte buffer has to be multiplied by a 16 byte key
-value in GF(128).  If we consider a GF(128) value in
+value in GF(2^128).  If we consider a GF(2^128) value in
 the buffer's lowest byte, we can construct a table of
 the 256 16 byte values that result from the 256 values
 of this byte.  This requires 4096 bytes. But we also
@@ -330,7 +331,7 @@ EXPORT_SYMBOL(gf128mul_64k_bbe);
 
 /*  This version uses 4k bytes of table space.
 A 16 byte buffer has to be multiplied by a 16 byte key
-value in GF(128).  If we consider a GF(128) value in a
+value in GF(2^128).  If we consider a GF(2^128) value in a
 single byte, we can construct a table of the 256 16 byte
 values that result from the 256 values of this byte.
 This requires 4096 bytes. If we take the highest byte in
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h
index 592d47e565a8..9662c4538873 100644
--- a/include/crypto/gf128mul.h
+++ b/include/crypto/gf128mul.h
@@ -43,7 +43,7 @@
  ---
  Issue Date: 31/01/2006
 
- An implementation of field multiplication in Galois Field GF(128)
+ An implementation of field multiplication in Galois Field GF(2^128)
 */
 
 #ifndef _CRYPTO_GF128MUL_H
@@ -65,7 +65,7 @@
  * are left and the lsb's are right. char b[16] is an array and b[0] is
  * the first octet.
  *
- * 8000       
+ * 1000       
  *   b[0] b[1] b[2] b[3]  b[13]b[14]b[15]
  *
  * Every bit is a coefficient of some power of X. We can store the bits
@@ -85,15 +85,17 @@
  * Both of the above formats are easy to implement on big-endian
  * machines.
  *
- * EME (which is patent encumbered) uses the ble format (bits are stored
- * in big endian order and the bytes in little endian). The above buffer
- * represents X^7 in this case and the primitive polynomial is b[0] = 0x87.
+ * XTS and EME (the latter of which is patent encumbered) use the ble
+ * format (bits are stored in big endian order and the bytes in little
+ * endian). The above buffer represents X^7 in this case and the
+ * primitive polynomial is b[0] = 0x87.
  *
  * The common machine word-size is smaller than 128 bits, so to make
  * an efficient implementation we must split into machine word sizes.
- * This file uses one 32bit for the moment. Machine endianness comes into
- * play. The lle format in relation to machine endianness is discussed
- * below by the original author of gf128mul Dr Brian Gladman.
+ * This implementation uses 64-bit words for the moment. Machine
+ * endianness comes into play. The lle format in relation to machine
+ * endianness is discussed below by the original author of gf128mul Dr
+ * Brian Gladman.
  *
  * Let's look at the bbe and ble format on a little endian machine.
  *
@@ -127,10 +129,10 @@
  * machines this will automatically aligned to wordsize and on a 64-bit
  * machine also.
  */
-/* Multiply a GF128 field element by x. Field elements are held in arrays
-of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower
-indexed bits placed in the more numerically significant bit positions
-within bytes.
+/* Multiply a GF(2^128) field element by x. Field elements are
+held in arrays of bytes in which field bits 8n..8n

[PATCH 0/2] crypto: constify test vectors

2017-02-24 Thread Eric Biggers

From: Eric Biggers 

These two patches mark all the cryptographic test vectors as 'const'.
This has several potential advantages and moves a large amount of data
from .data to .rodata when the tests are enabled.  The second patch does
the real work; the first just prepares for it by updating a function to
take a const buffer argument.

Eric Biggers (2):
  crypto: kpp - constify buffer passed to crypto_kpp_set_secret()
  crypto: testmgr - constify all test vectors

 crypto/dh.c   |   3 +-
 crypto/ecdh.c |   3 +-
 crypto/testmgr.c  |  71 ++--
 crypto/testmgr.h  | 512 +-
 drivers/crypto/qat/qat_common/qat_asym_algs.c |   2 +-
 include/crypto/kpp.h  |   6 +-
 6 files changed, 305 insertions(+), 292 deletions(-)

-- 
2.11.0.483.g087da7b7c-goog

[PATCH 1/2] crypto: kpp - constify buffer passed to crypto_kpp_set_secret()

2017-02-24 Thread Eric Biggers

From: Eric Biggers 

Constify the buffer passed to crypto_kpp_set_secret() and
kpp_alg.set_secret, since it is never modified.

Signed-off-by: Eric Biggers 
---
 crypto/dh.c   | 3 ++-
 crypto/ecdh.c | 3 ++-
 drivers/crypto/qat/qat_common/qat_asym_algs.c | 2 +-
 include/crypto/kpp.h  | 6 +++---
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index ddcb528ab2cc..87e3542cf1b8 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -79,7 +79,8 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh 
*params)
return 0;
 }
 
-static int dh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len)
+static int dh_set_secret(struct crypto_kpp *tfm, const void *buf,
+unsigned int len)
 {
struct dh_ctx *ctx = dh_get_ctx(tfm);
struct dh params;
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 3de289806d67..63ca33771e4e 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -38,7 +38,8 @@ static unsigned int ecdh_supported_curve(unsigned int 
curve_id)
}
 }
 
-static int ecdh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len)
+static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
+  unsigned int len)
 {
struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
struct ecdh params;
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c 
b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 0d35dca2e925..2aab80bc241f 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -491,7 +491,7 @@ static void qat_dh_clear_ctx(struct device *dev, struct 
qat_dh_ctx *ctx)
ctx->g2 = false;
 }
 
-static int qat_dh_set_secret(struct crypto_kpp *tfm, void *buf,
+static int qat_dh_set_secret(struct crypto_kpp *tfm, const void *buf,
 unsigned int len)
 {
struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
index 4307a2f2365f..ce8e1f79374b 100644
--- a/include/crypto/kpp.h
+++ b/include/crypto/kpp.h
@@ -74,7 +74,7 @@ struct crypto_kpp {
  * @base:  Common crypto API algorithm data structure
  */
 struct kpp_alg {
-   int (*set_secret)(struct crypto_kpp *tfm, void *buffer,
+   int (*set_secret)(struct crypto_kpp *tfm, const void *buffer,
  unsigned int len);
int (*generate_public_key)(struct kpp_request *req);
int (*compute_shared_secret)(struct kpp_request *req);
@@ -273,8 +273,8 @@ struct kpp_secret {
  *
  * Return: zero on success; error code in case of error
  */
-static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, void *buffer,
-   unsigned int len)
+static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm,
+   const void *buffer, unsigned int len)
 {
struct kpp_alg *alg = crypto_kpp_alg(tfm);
 
-- 
2.11.0.483.g087da7b7c-goog

[PATCH 2/2] crypto: testmgr - constify all test vectors

2017-02-24 Thread Eric Biggers

From: Eric Biggers 

Cryptographic test vectors should never be modified, so constify them to
enforce this at both compile-time and run-time.  This moves a significant
amount of data from .data to .rodata when the crypto tests are enabled.

Signed-off-by: Eric Biggers 
---
 crypto/testmgr.c |  71 
 crypto/testmgr.h | 512 +++
 2 files changed, 297 insertions(+), 286 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f9c378af3907..89f1dd1f4b13 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -83,47 +83,47 @@ struct tcrypt_result {
 
 struct aead_test_suite {
struct {
-   struct aead_testvec *vecs;
+   const struct aead_testvec *vecs;
unsigned int count;
} enc, dec;
 };
 
 struct cipher_test_suite {
struct {
-   struct cipher_testvec *vecs;
+   const struct cipher_testvec *vecs;
unsigned int count;
} enc, dec;
 };
 
 struct comp_test_suite {
struct {
-   struct comp_testvec *vecs;
+   const struct comp_testvec *vecs;
unsigned int count;
} comp, decomp;
 };
 
 struct hash_test_suite {
-   struct hash_testvec *vecs;
+   const struct hash_testvec *vecs;
unsigned int count;
 };
 
 struct cprng_test_suite {
-   struct cprng_testvec *vecs;
+   const struct cprng_testvec *vecs;
unsigned int count;
 };
 
 struct drbg_test_suite {
-   struct drbg_testvec *vecs;
+   const struct drbg_testvec *vecs;
unsigned int count;
 };
 
 struct akcipher_test_suite {
-   struct akcipher_testvec *vecs;
+   const struct akcipher_testvec *vecs;
unsigned int count;
 };
 
 struct kpp_test_suite {
-   struct kpp_testvec *vecs;
+   const struct kpp_testvec *vecs;
unsigned int count;
 };
 
@@ -145,7 +145,8 @@ struct alg_test_desc {
} suite;
 };
 
-static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 
};
+static const unsigned int IDX[8] = {
+   IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 };
 
 static void hexdump(unsigned char *buf, unsigned int len)
 {
@@ -203,7 +204,7 @@ static int wait_async_op(struct tcrypt_result *tr, int ret)
 }
 
 static int ahash_partial_update(struct ahash_request **preq,
-   struct crypto_ahash *tfm, struct hash_testvec *template,
+   struct crypto_ahash *tfm, const struct hash_testvec *template,
void *hash_buff, int k, int temp, struct scatterlist *sg,
const char *algo, char *result, struct tcrypt_result *tresult)
 {
@@ -260,9 +261,9 @@ static int ahash_partial_update(struct ahash_request **preq,
return ret;
 }
 
-static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
-  unsigned int tcount, bool use_digest,
-  const int align_offset)
+static int __test_hash(struct crypto_ahash *tfm,
+  const struct hash_testvec *template, unsigned int tcount,
+  bool use_digest, const int align_offset)
 {
const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm));
size_t digest_size = crypto_ahash_digestsize(tfm);
@@ -538,7 +539,8 @@ static int __test_hash(struct crypto_ahash *tfm, struct 
hash_testvec *template,
return ret;
 }
 
-static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
+static int test_hash(struct crypto_ahash *tfm,
+const struct hash_testvec *template,
 unsigned int tcount, bool use_digest)
 {
unsigned int alignmask;
@@ -566,7 +568,7 @@ static int test_hash(struct crypto_ahash *tfm, struct 
hash_testvec *template,
 }
 
 static int __test_aead(struct crypto_aead *tfm, int enc,
-  struct aead_testvec *template, unsigned int tcount,
+  const struct aead_testvec *template, unsigned int tcount,
   const bool diff_dst, const int align_offset)
 {
const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
@@ -957,7 +959,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 }
 
 static int test_aead(struct crypto_aead *tfm, int enc,
-struct aead_testvec *template, unsigned int tcount)
+const struct aead_testvec *template, unsigned int tcount)
 {
unsigned int alignmask;
int ret;
@@ -990,7 +992,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 }
 
 static int test_cipher(struct crypto_cipher *tfm, int enc,
-  struct cipher_testvec *template, unsigned int tcount)
+  const struct cipher_testvec *template,
+  unsigned int tcount)
 {
const char *algo = crypto_tfm_alg_driver_name(crypto_cipher_tfm(tfm));
unsigned int i, j, k;
@@ -1068,7 +1071,8 @@ static int test_cipher(struct crypto_cipher *tfm

Re: [PATCH v4 0/8] crypto: aes - retire table based generic AES

2017-07-24 Thread Eric Biggers

On Mon, Jul 24, 2017 at 07:59:43AM +0100, Ard Biesheuvel wrote:
> On 18 July 2017 at 13:06, Ard Biesheuvel  wrote:
> > The generic AES driver uses 16 lookup tables of 1 KB each, and has
> > encryption and decryption routines that are fully unrolled. Given how
> > the dependencies between this code and other drivers are declared in
> > Kconfig files, this code is always pulled into the core kernel, even
> > if it is usually superseded at runtime by accelerated drivers that
> > exist for many architectures.
> >
> > This leaves us with 25 KB of dead code in the kernel, which is negligible
> > in typical environments, but which is actually a big deal for the IoT
> > domain, where every kilobyte counts.
> >
> > Also, the scalar, table based AES routines that exist for ARM, arm64, i586
> > and x86_64 share the lookup tables with AES generic, and may be invoked
> > occasionally when the time-invariant AES-NI or other special instruction
> > drivers are called in interrupt context, at which time the SIMD register
> > file cannot be used. Pulling 16 KB of code and 9 KB of instructions into
> > the L1s (and evicting what was already there) when a softirq happens to
> > be handled in the context of an interrupt taken from kernel mode (which
> > means no SIMD on x86) is also something that we may like to avoid, by
> > falling back to a much smaller and moderately less performant driver.
> > (Note that arm64 will be updated shortly to supply fallbacks for all
> > SIMD based AES implementations, which will be based on the core routines)
> >
> > For the reasons above, this series refactors the way the various AES
> > implementations are wired up, to allow the generic version in
> > crypto/aes_generic.c to be omitted from the build entirely.
> >
> > Patch #1 removes some bogus 'select CRYPTO_AES' statement.
> >
> > Patch #2 factors out aes-generic's lookup tables, which are shared with
> > arch-specific implementations in arch/x86, arch/arm and arch/arm64.
> >
> > Patch #3 replaces the table based aes-generic.o with a new aes.o based on
> > the fixed time cipher, and uses it to fulfil dependencies on CRYPTO_AES.
> >
> > Patch #4 switches the fallback in the AES-NI code to the new, generic 
> > encrypt
> > and decrypt routines so it no longer depends on the x86 scalar code or
> > [transitively] on AES-generic.
> >
> > Patch #5 tweaks the ARM table based code to only use 2 KB + 256 bytes worth
> > of lookup tables instead of 4 KB.
> >
> > Patch #6 does the same for arm64
> >
> > Patch #7 removes the local copy of the AES sboxes from the arm64 NEON 
> > driver,
> > and switches to the ones exposed by the new AES core module instead.
> >
> > Patch #8 updates the Kconfig help text to be more descriptive of what they
> > actually control, rather than duplicating AES's wikipedia entry a number of
> > times.
> >
> > v4: - remove aes-generic altogether instead of allow a preference to be set
> 
> Actually, after benchmarking the x86_64 asm AES code, I am not so sure
> we should remove AES_GENERIC at all, since it turns out to be faster.
> Interestingly, I found a remark by Eric in the git log stating the
> same, so if we want to cut down on AES variants, we should probably
> start by deleting the x86 code instead.
> 
> So please disregard this for now: I will rework the other stuff I have
> so it no longer depends on this, and repost, because it is much more
> important for me that that makes it into v4.14. This can wait for
> v4.15, as far as I am concerned, and I will benchmark a bit more to
> see if we can get rid of the i586 code as well.
> 

Yes I did notice that aes-generic was actually faster.  Probably the x86_64-asm
implementation should be removed, but it may be worthwhile to try a few
different gcc versions to see how well they compile aes-generic.  I expect that
x86_64-asm used to be faster but gcc has gotten smarter.  Also x86_64-asm is
only really useful on older CPUs, so ideally it should be benchmarked on an
older CPU.

Eric

[PATCH v2 1/7] fscrypt: add v2 encryption context and policy

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

Currently, the fscrypt_context (i.e. the encryption xattr) does not
contain a cryptographically secure identifier for the master key's
payload.  Therefore it's not possible to verify that the correct key was
supplied, which is problematic in multi-user scenarios.  To make this
possible, define a new fscrypt_context version (v2) which includes a
key_hash field, and allow userspace to opt-in to it when setting an
encryption policy by setting fscrypt_policy.version to 2.  For now just
zero the new field; a later patch will start setting it for real.

Even though we aren't changing the layout of struct fscrypt_policy (i.e.
the struct used by the ioctls), the new context version still has to be
"opt-in" because old kernels will not recognize it, and the keyring key
will now need to be available when setting an encryption policy, which
is an API change.  We'll also be taking the opportunity to make another
API change (dropping support for the filesystem-specific key prefixes).

Previously, the version numbers were 0 in the fscrypt_policy and 1 in
the fscrypt_context.  Rather than incrementing them to 1 and 2, make
them both 2 to be consistent with each other.  It's not required that
these numbers match, but it should make things less confusing.

An alternative to adding a key_hash field would have been to reuse
master_key_descriptor.  However, master_key_descriptor is only 8 bytes,
which is too short to be a cryptographically secure hash.  Thus,
master_key_descriptor would have needed to be lengthened to 16+ bytes,
which would have required defining a fscrypt_policy_v2 structure and
adding a FS_IOC_GET_ENCRYPTION_POLICY_V2 ioctl.  It also would have
required userspace to start using a specific hash algorithm to create
the key descriptors, which would have made the API harder to use.
Perhaps it should have been done that way originally, but at this point
it seems better to keep the API simpler.

Acked-by: Michael Halcrow 
Signed-off-by: Eric Biggers 
---
 fs/crypto/fscrypt_private.h| 84 ++
 fs/crypto/keyinfo.c| 14 +++
 fs/crypto/policy.c | 73 ++--
 include/linux/fscrypt_common.h |  2 +-
 include/uapi/linux/fs.h|  6 +++
 5 files changed, 135 insertions(+), 44 deletions(-)

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index a1d5021c31ef..8329fb905ac6 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -24,40 +24,92 @@
 #define FS_AES_256_CTS_KEY_SIZE32
 #define FS_AES_256_XTS_KEY_SIZE64
 
-#define FS_KEY_DERIVATION_NONCE_SIZE   16
+#define FS_KEY_DERIVATION_NONCE_SIZE   16
+#define FSCRYPT_KEY_HASH_SIZE  16
 
 /**
- * Encryption context for inode
+ * fscrypt_context - the encryption context for an inode
  *
- * Protector format:
- *  1 byte: Protector format (1 = this version)
- *  1 byte: File contents encryption mode
- *  1 byte: File names encryption mode
- *  1 byte: Flags
- *  8 bytes: Master Key descriptor
- *  16 bytes: Encryption Key derivation nonce
+ * Filesystems usually store this in an extended attribute.  It identifies the
+ * encryption algorithm and key with which the file is encrypted.
+ *
+ * Since this is stored on-disk, be careful not to reorder fields or add any
+ * implicit padding bytes!
  */
 struct fscrypt_context {
-   u8 format;
+   /* v1+ */
+
+   /* Version of this structure */
+   u8 version;
+
+   /* Encryption mode for the contents of regular files */
u8 contents_encryption_mode;
+
+   /* Encryption mode for filenames in directories and symlink targets */
u8 filenames_encryption_mode;
+
+   /* Options that affect how encryption is done (e.g. padding amount) */
u8 flags;
+
+   /* Descriptor for this file's master key in the keyring */
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+
+   /*
+* A unique value used in combination with the master key to derive the
+* file's actual encryption key
+*/
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
-} __packed;
 
-#define FS_ENCRYPTION_CONTEXT_FORMAT_V11
+   /* v2+ */
+
+   /* Cryptographically secure hash of the master key */
+   u8 key_hash[FSCRYPT_KEY_HASH_SIZE];
+};
+
+#define FSCRYPT_CONTEXT_V1 1
+#define FSCRYPT_CONTEXT_V1_SIZEoffsetof(struct fscrypt_context, 
key_hash)
+
+#define FSCRYPT_CONTEXT_V2 2
+#define FSCRYPT_CONTEXT_V2_SIZEsizeof(struct fscrypt_context)
+
+static inline int fscrypt_context_size(const struct fscrypt_context *ctx)
+{
+   switch (ctx->version) {
+   case FSCRYPT_CONTEXT_V1:
+   return FSCRYPT_CONTEXT_V1_SIZE;
+   case FSCRYPT_CONTEXT_V2:
+   return FSCRYPT_CONTEXT_V2_SIZE;
+   }
+   return 0;
+}
+
+static inline bool
+fscrypt_valid_context_fo

[PATCH v2 0/7] fscrypt: key verification and KDF improvement

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

This patch series solves two major problems which filesystem-level
encryption has currently.  First, the user-supplied master keys are not
verified, which means a malicious user can provide the wrong key for
another user's file and cause a DOS attack or worse.  This flaw has been
criticized in the past [1].  Second, the KDF (Key Derivation Function)
used to derive per-file keys is ad-hoc and nonstandard.  While it meets
the primary security requirement, it's inflexible and is missing some
useful properties such as non-reversibility, which is important under
some threat models.  This weakness was noted by Unterluggauer and
Mangard (2016) [2] who also demonstrated an EM attack against the
current AES-based KDF.

These problems are solved together by introducing a new encryption
policy version where the KDF is changed to HKDF-SHA512, i.e. RFC-5869
[3] with SHA-512 as the underlying hash function.  HKDF is used to
derive the per-file keys as well as to generate a "key hash" which is
stored on-disk to allow key verification.  The HMAC transform for each
master key is pre-keyed and cached, which in practice makes the new KDF
about as fast or even faster than the old one which did not use the
crypto API efficiently.

Please give special consideration to the choice and usage of crypto
algorithms and any other on-disk format and API changes, since we will
be locked into these once merged.

As a sanity check, I have verified the new on-disk format on ext4 by
dumping the encryption xattr and contents of an encrypted regular file
using debugfs, then reproducing the key_hash field and the encrypted
contents in userspace using the RFC-6234 example code and OpenSSL for
HKDF-SHA512 and AES-256-XTS respectively --- given the master key and
unencrypted file contents.

All these changes are independent of filesystem and encryption mode,
i.e. the "v2" encryption policies can be used on any fscrypt-capable
filesystem (ext4, f2fs, or ubifs currently) and with any of the
supported encryption modes.

Changes since v1:
- Validate encryption modes and flags earlier in
  fscrypt_ioctl_set_policy(), since the modes are used in
  fscrypt_compute_key_hash().
- Various cleanups
- Base on v4.13-rc2

References:
  [1] 
https://blog.quarkslab.com/a-glimpse-of-ext4-filesystem-level-encryption.html

  [2] Unterluggauer and Mangard (2016).  "Exploiting the Physical
  Disparity: Side-Channel Attacks on Memory Encryption".
  https://eprint.iacr.org/2016/473.pdf

  [3] RFC 5869. "HMAC-based Extract-and-Expand Key Derivation Function
  (HKDF)".  https://tools.ietf.org/html/rfc5869

Eric Biggers (7):
  fscrypt: add v2 encryption context and policy
  fscrypt: rename ->ci_master_key to ->ci_master_key_descriptor
  fscrypt: use HKDF-SHA512 to derive the per-inode encryption keys
  fscrypt: validate modes and flags earlier when setting policy
  fscrypt: verify that the correct master key was supplied
  fscrypt: cache the HMAC transform for each master key
  fscrypt: for v2 policies, support "fscrypt:" key prefix only

 fs/crypto/Kconfig  |   2 +
 fs/crypto/fscrypt_private.h| 113 +--
 fs/crypto/keyinfo.c| 668 ++---
 fs/crypto/policy.c | 138 ++---
 fs/super.c |   4 +
 include/linux/fs.h |   5 +
 include/linux/fscrypt_common.h |   2 +-
 include/uapi/linux/fs.h|   6 +
 8 files changed, 779 insertions(+), 159 deletions(-)

-- 
2.14.0.rc0.400.g1c36432dff-goog

[PATCH v2 5/7] fscrypt: verify that the correct master key was supplied

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

Currently, while a fscrypt master key is required to have a certain
description in the keyring, its payload is never verified to be correct.
While sufficient for well-behaved userspace, this is insecure in a
multi-user system where a user has been given only read-only access to
an encrypted file or directory.  Specifically, if an encrypted file or
directory does not yet have its key cached by the kernel, the first user
who accesses it can provide an arbitrary key in their own keyring, which
the kernel will then associate with the inode and use for read(),
write(), readdir(), etc. by other users as well.

Consequently, it's trivial for a user with *read-only* access to an
encrypted file or directory to make it appear as garbage to everyone.
Creative users might be able to accomplish more sophisticated attacks by
careful choice of the key, e.g. choosing a key causes certain bytes of
file contents to have certain values or that causes filenames containing
the '/' character to appear.

Solve the problem for v2 encryption policies by storing a "hash" of the
master encryption key in the encryption xattr and verifying it before
accepting the user-provided key.  We generate the "hash" using
HKDF-SHA512 by passing a distinct application-specific info string.
This produces a value which is cryptographically isolated and can be
stored in the clear without leaking any information about the master key
or any other derived keys (in a computational sense).  Reusing HKDF is
better than doing e.g. SHA-512(master_key) because it avoids passing the
same key into different cryptographic primitives.

We make the hash field 16 bytes long, as this should provide sufficient
collision and preimage resistance while not wasting too much space for
the encryption xattr.

Acked-by: Michael Halcrow 
Signed-off-by: Eric Biggers 
---
 fs/crypto/fscrypt_private.h |  4 
 fs/crypto/keyinfo.c | 46 +
 fs/crypto/policy.c  | 55 -
 3 files changed, 95 insertions(+), 10 deletions(-)

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index b5881cb26e8f..f9469a71cc29 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -94,6 +94,7 @@ fscrypt_valid_context_format(const struct fscrypt_context 
*ctx, int size)
  */
 struct fscrypt_master_key {
struct crypto_shash *mk_hmac;
+   u8  mk_hash[FSCRYPT_KEY_HASH_SIZE];
 };
 
 /*
@@ -157,6 +158,9 @@ extern struct page *fscrypt_alloc_bounce_page(struct 
fscrypt_ctx *ctx,
  gfp_t gfp_flags);
 
 /* keyinfo.c */
+extern int fscrypt_compute_key_hash(const struct inode *inode,
+   const struct fscrypt_policy *policy,
+   u8 hash[FSCRYPT_KEY_HASH_SIZE]);
 extern void __exit fscrypt_essiv_cleanup(void);
 
 #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 3164e1c74196..a9514078cf9d 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -39,8 +39,11 @@ static struct crypto_shash *essiv_hash_tfm;
  *
  * Keys derived with different info strings are cryptographically isolated from
  * each other --- knowledge of one derived key doesn't reveal any others.
+ * (This property is particularly important for the derived key used as the
+ * "key hash", as that is stored in the clear.)
  */
 #define HKDF_CONTEXT_PER_FILE_KEY  1
+#define HKDF_CONTEXT_KEY_HASH  2
 
 /*
  * HKDF consists of two steps:
@@ -211,6 +214,12 @@ alloc_master_key(const struct fscrypt_key *payload)
err = crypto_shash_setkey(k->mk_hmac, prk, sizeof(prk));
if (err)
goto fail;
+
+   /* Calculate the "key hash" */
+   err = hkdf_expand(k->mk_hmac, HKDF_CONTEXT_KEY_HASH, NULL, 0,
+ k->mk_hash, FSCRYPT_KEY_HASH_SIZE);
+   if (err)
+   goto fail;
 out:
memzero_explicit(prk, sizeof(prk));
return k;
@@ -536,6 +545,31 @@ void __exit fscrypt_essiv_cleanup(void)
crypto_free_shash(essiv_hash_tfm);
 }
 
+int fscrypt_compute_key_hash(const struct inode *inode,
+const struct fscrypt_policy *policy,
+u8 hash[FSCRYPT_KEY_HASH_SIZE])
+{
+   struct fscrypt_master_key *k;
+   unsigned int min_keysize;
+
+   /*
+* Require that the master key be long enough for both the
+* contents and filenames encryption modes.
+*/
+   min_keysize =
+   max(available_modes[policy->contents_encryption_mode].keysize,
+   available_modes[policy->filenames_encryption_mode].keysize);
+
+   k = load_master_key_from_keyring(inode, policy->master_key_descriptor,
+min_keysize);
+   if (I

[PATCH v2 7/7] fscrypt: for v2 policies, support "fscrypt:" key prefix only

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

Since v2 encryption policies are opt-in, take the opportunity to also
drop support for the legacy filesystem-specific key description prefixes
"ext4:", "f2fs:", and "ubifs:", instead requiring the generic prefix
"fscrypt:".  The generic prefix is preferred since it works for all
filesystems.  Also there is a performance benefit from not having to
search the keyrings twice.

The old prefixes remain supported for v1 encryption policies.

Reviewed-by: Michael Halcrow 
Signed-off-by: Eric Biggers 
---
 fs/crypto/fscrypt_private.h |  3 +--
 fs/crypto/keyinfo.c | 16 
 fs/crypto/policy.c  |  2 +-
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 4221c5b23882..c9de15d4b5b0 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -169,8 +169,7 @@ extern struct page *fscrypt_alloc_bounce_page(struct 
fscrypt_ctx *ctx,
  gfp_t gfp_flags);
 
 /* keyinfo.c */
-extern int fscrypt_compute_key_hash(const struct inode *inode,
-   const struct fscrypt_policy *policy,
+extern int fscrypt_compute_key_hash(const struct fscrypt_policy *policy,
u8 hash[FSCRYPT_KEY_HASH_SIZE]);
 extern void __exit fscrypt_essiv_cleanup(void);
 
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 2fca72826768..81b08d4a7efe 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -384,8 +384,7 @@ find_and_lock_keyring_key(const char *prefix,
 }
 
 static struct fscrypt_master_key *
-load_master_key_from_keyring(const struct inode *inode,
-const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE],
+load_master_key_from_keyring(const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE],
 unsigned int min_keysize)
 {
struct key *keyring_key;
@@ -394,11 +393,6 @@ load_master_key_from_keyring(const struct inode *inode,
 
keyring_key = find_and_lock_keyring_key(FS_KEY_DESC_PREFIX, descriptor,
min_keysize, &payload);
-   if (keyring_key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
-   keyring_key = find_and_lock_keyring_key(
-   inode->i_sb->s_cop->key_prefix,
-   descriptor, min_keysize, &payload);
-   }
if (IS_ERR(keyring_key))
return ERR_CAST(keyring_key);
 
@@ -440,8 +434,7 @@ find_or_create_master_key(const struct inode *inode,
/*
 * The needed master key isn't in memory yet.  Load it from the keyring.
 */
-   master_key = load_master_key_from_keyring(inode,
- ctx->master_key_descriptor,
+   master_key = load_master_key_from_keyring(ctx->master_key_descriptor,
  min_keysize);
if (IS_ERR(master_key))
return master_key;
@@ -675,8 +668,7 @@ void __exit fscrypt_essiv_cleanup(void)
crypto_free_shash(essiv_hash_tfm);
 }
 
-int fscrypt_compute_key_hash(const struct inode *inode,
-const struct fscrypt_policy *policy,
+int fscrypt_compute_key_hash(const struct fscrypt_policy *policy,
 u8 hash[FSCRYPT_KEY_HASH_SIZE])
 {
struct fscrypt_master_key *k;
@@ -690,7 +682,7 @@ int fscrypt_compute_key_hash(const struct inode *inode,
max(available_modes[policy->contents_encryption_mode].keysize,
available_modes[policy->filenames_encryption_mode].keysize);
 
-   k = load_master_key_from_keyring(inode, policy->master_key_descriptor,
+   k = load_master_key_from_keyring(policy->master_key_descriptor,
 min_keysize);
if (IS_ERR(k))
return PTR_ERR(k);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index ece55350cee8..fa5599005a72 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -119,7 +119,7 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void 
__user *arg)
pr_warn_once("%s (pid %d) is setting less secure v0 encryption 
policy; recommend upgrading to v2.\n",
 current->comm, current->pid);
} else {
-   ret = fscrypt_compute_key_hash(inode, &policy, key_hash);
+   ret = fscrypt_compute_key_hash(&policy, key_hash);
if (ret)
return ret;
}
-- 
2.14.0.rc0.400.g1c36432dff-goog

[PATCH v2 6/7] fscrypt: cache the HMAC transform for each master key

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

Now that we have a key_hash field which securely identifies a master key
payload, introduce a cache of the HMAC transforms for the master keys
currently in use for inodes using v2+ encryption policies.  The entries
in this cache are called 'struct fscrypt_master_key' and are identified
by key_hash.  The cache is per-superblock.  (It could be global, but
making it per-superblock should reduce the lock contention a bit, and we
may need to keep track of keys on a per-superblock basis for other
reasons later, e.g. to implement an ioctl for evicting keys.)

This results in a large efficiency gain: we now only have to allocate
and key an "hmac(sha512)" transformation, execute HKDF-Extract, and
compute key_hash once per master key rather than once per inode.  Note
that this optimization can't easily be applied to the original AES-based
KDF because that uses a different AES key for each KDF execution.  In
practice, this difference makes the HKDF per-inode encryption key
derivation performance comparable to or even faster than the old KDF,
which typically spends more time allocating an "ecb(aes)" transformation
from the crypto API than doing actual crypto work.

Note that it would have been possible to make the mapping be from
raw_key => fscrypt_master_key (where raw_key denotes the actual bytes of
the master key) rather than from key_hash => fscrypt_master_key.
However, an advantage of doing lookups by key_hash is that it replaces
the keyring lookup in most cases, which opens up the future
possibilities of not even having the master key in memory following an
initial provisioning step (if the HMAC-SHA512 implementation is
hardware-backed), or of introducing an ioctl to provision a key to the
filesystem directly, avoiding keyrings and their visibility problems
entirely.  Also, because key_hash is public information while raw_key is
secret information, it would have been very difficult to use raw_key as
a map key in a way that would prevent timing attacks while still being
scalable to a large number of entries.

Reviewed-by: Michael Halcrow 
Signed-off-by: Eric Biggers 
---
 fs/crypto/fscrypt_private.h |  11 
 fs/crypto/keyinfo.c | 134 +++-
 fs/crypto/policy.c  |   5 +-
 fs/super.c  |   4 ++
 include/linux/fs.h  |   5 ++
 5 files changed, 152 insertions(+), 7 deletions(-)

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index f9469a71cc29..4221c5b23882 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -91,10 +91,21 @@ fscrypt_valid_context_format(const struct fscrypt_context 
*ctx, int size)
 
 /*
  * fscrypt_master_key - an in-use master key
+ *
+ * This is referenced from each in-core inode that has been "unlocked" using a
+ * particular master key.  It's primarily used to cache the HMAC transform so
+ * that the per-inode encryption keys can be derived efficiently with HKDF.  It
+ * is securely erased once all inodes referencing it have been evicted.
+ *
+ * If the same master key is used on different filesystems (unusual, but
+ * possible), we'll create one of these structs for each filesystem.
  */
 struct fscrypt_master_key {
struct crypto_shash *mk_hmac;
u8  mk_hash[FSCRYPT_KEY_HASH_SIZE];
+   refcount_t  mk_refcount;
+   struct rb_node  mk_node;
+   struct super_block  *mk_sb;
 };
 
 /*
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index a9514078cf9d..2fca72826768 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -176,6 +176,14 @@ static void put_master_key(struct fscrypt_master_key *k)
if (!k)
return;
 
+   if (refcount_read(&k->mk_refcount) != 0) { /* in ->s_master_keys? */
+   if (!refcount_dec_and_lock(&k->mk_refcount,
+  &k->mk_sb->s_master_keys_lock))
+   return;
+   rb_erase(&k->mk_node, &k->mk_sb->s_master_keys);
+   spin_unlock(&k->mk_sb->s_master_keys_lock);
+   }
+
crypto_free_shash(k->mk_hmac);
kzfree(k);
 }
@@ -230,6 +238,87 @@ alloc_master_key(const struct fscrypt_key *payload)
goto out;
 }
 
+/*
+ * ->s_master_keys is a map of master keys currently in use by in-core inodes 
on
+ * a given filesystem, identified by key_hash which is a cryptographically
+ * secure identifier for an actual key payload.
+ *
+ * Note that master_key_descriptor cannot be used to identify the keys because
+ * master_key_descriptor only identifies the "location" of a key in the 
keyring,
+ * not the actual key payload --- i.e., buggy or malicious userspace may 
provide
+ * different keys with the same master_key_descriptor.
+ */
+
+/*
+ * Search ->s_master_keys for the f

[PATCH v2 3/7] fscrypt: use HKDF-SHA512 to derive the per-inode encryption keys

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

By design, the keys which userspace provides in the keyring are not used
to encrypt data directly.  Instead, a KDF (Key Derivation Function) is
used to derive a unique encryption key for each inode, given a "master"
key and a nonce.  The current KDF encrypts the master key with
AES-128-ECB using the nonce as the AES key.  This KDF is ad-hoc and is
not specified in any standard.  While it does generate unique derived
keys with sufficient entropy, it has several disadvantages:

- It's reversible: an attacker who compromises a derived key, e.g. using
  a side channel attack, can "decrypt" it to get back to the master key.

- It's not very extensible because it cannot easily be used to derive
  other key material that may be needed and it ties the length of the
  derived key closely to the length of the master key.

- It doesn't evenly distribute the entropy from the master key.  For
  example, the first 16 bytes of each derived key depend only on the
  first 16 bytes of the master key.

- It uses a public value as an AES key, which is unusual.  Ciphers are
  rarely evaluated under a threat model where the keys are public and
  the messages are secret.

Solve all these problems for v2 encryption policies by changing the KDF
to HKDF with SHA-512 as the underlying hash function.  To derive each
inode's encryption key, HKDF is executed with the master key as the
input key material, a fixed salt, and the per-inode nonce prefixed with
a context byte as the application-specific information string.  Unlike
the current KDF, HKDF has been formally published and standardized
[1][2], is nonreversible, can be used to derive any number and length of
secret and/or non-secret keys, and evenly distributes the entropy from
the master key (excepting limits inherent to not using a random salt).

Note that this introduces a dependency on the security and
implementation of SHA-512, whereas before we were using only AES for
both key derivation and encryption.  However, by using HMAC rather than
the hash function directly, HKDF is designed to remain secure even if
various classes of attacks, e.g. collision attacks, are found against
the underlying unkeyed hash function.  Even HMAC-MD5 is still considered
secure in practice, despite MD5 itself having been heavily compromised.

We *could* avoid introducing a hash function by instantiating
HKDF-Expand with CMAC-AES256 as the pseudorandom function rather than
HMAC-SHA512.  This would work; however, the HKDF specification doesn't
explicitly allow a non-HMAC pseudorandom function, so it would be less
standard.  It would also require skipping HKDF-Extract and changing the
API to accept only 32-byte master keys (since otherwise HKDF-Extract
using CMAC-AES would produce a pseudorandom key only 16 bytes long which
is only enough for AES-128, not AES-256).

HKDF-SHA512 can require more "crypto work" per key derivation when
compared to the current KDF.  However, later in this series, we'll start
caching the HMAC transform for each master key, which will actually make
the real-world performance about the same or even significantly better
than the AES-based KDF as currently implemented.  Also, each KDF can
actually be executed on the order of 1 million times per second, so KDF
performance probably isn't actually the bottleneck in practice anyway.

References:
  [1] Krawczyk (2010). "Cryptographic Extraction and Key Derivation: The
  HKDF Scheme".  https://eprint.iacr.org/2010/264.pdf

  [2] RFC 5869. "HMAC-based Extract-and-Expand Key Derivation Function
  (HKDF)".  https://tools.ietf.org/html/rfc5869

Signed-off-by: Eric Biggers 
---
 fs/crypto/Kconfig   |   2 +
 fs/crypto/fscrypt_private.h |  13 ++
 fs/crypto/keyinfo.c | 484 +++-
 3 files changed, 403 insertions(+), 96 deletions(-)

diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index 02b7d91c9231..bbd4e38b293c 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -8,6 +8,8 @@ config FS_ENCRYPTION
select CRYPTO_CTS
select CRYPTO_CTR
select CRYPTO_SHA256
+   select CRYPTO_SHA512
+   select CRYPTO_HMAC
select KEYS
help
  Enable encryption of files and directories.  This
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 4b0205accae9..b5881cb26e8f 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -89,6 +89,13 @@ fscrypt_valid_context_format(const struct fscrypt_context 
*ctx, int size)
return size >= 1 && size == fscrypt_context_size(ctx);
 }
 
+/*
+ * fscrypt_master_key - an in-use master key
+ */
+struct fscrypt_master_key {
+   struct crypto_shash *mk_hmac;
+};
+
 /*
  * fscrypt_info - the "encryption key" for an inode
  *
@@ -102,6 +109,12 @@ struct fscrypt_info {
struct crypto_skcipher *ci_ctfm;
stru

[PATCH v2 4/7] fscrypt: validate modes and flags earlier when setting policy

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

For FS_IOC_SET_ENCRYPTION_POLICY, currently the encryption modes and
flags are only validated when a new encryption policy is being set, not
when an existing policy is being compared to the one specified.
However, we're going to start needing to compute the key_hash in both
cases, and for this it's helpful to validate that the master key has the
minimum length required by the specified encryption modes.  Therefore,
move the modes and flags validation earlier in the ioctl, next to where
we validate the policy version.

Signed-off-by: Eric Biggers 
---
 fs/crypto/policy.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index fe525da9e79c..d1e58798da3c 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -60,13 +60,6 @@ static int create_encryption_context_from_policy(struct 
inode *inode,
 {
struct fscrypt_context ctx;
 
-   if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
-policy->filenames_encryption_mode))
-   return -EINVAL;
-
-   if (policy->flags & ~FS_POLICY_FLAGS_VALID)
-   return -EINVAL;
-
ctx.version = context_version_for_policy(policy);
ctx.contents_encryption_mode = policy->contents_encryption_mode;
ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
@@ -100,6 +93,13 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void 
__user *arg)
policy.version != FS_POLICY_VERSION_HKDF)
return -EINVAL;
 
+   if (!fscrypt_valid_enc_modes(policy.contents_encryption_mode,
+policy.filenames_encryption_mode))
+   return -EINVAL;
+
+   if (policy.flags & ~FS_POLICY_FLAGS_VALID)
+   return -EINVAL;
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
-- 
2.14.0.rc0.400.g1c36432dff-goog

[PATCH v2 2/7] fscrypt: rename ->ci_master_key to ->ci_master_key_descriptor

2017-07-26 Thread Eric Biggers

From: Eric Biggers 

In struct fscrypt_info, ->ci_master_key is the master key descriptor,
not the master key itself.  In preparation for introducing a struct
fscrypt_master_key and making ->ci_master_key point to it, rename the
existing ->ci_master_key to ->ci_master_key_descriptor.

Acked-by: Michael Halcrow 
Signed-off-by: Eric Biggers 
---
 fs/crypto/fscrypt_private.h | 2 +-
 fs/crypto/keyinfo.c | 4 ++--
 fs/crypto/policy.c  | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 8329fb905ac6..4b0205accae9 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -110,7 +110,7 @@ struct fscrypt_info {
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
-   u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
+   u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
 };
 
 typedef enum {
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 7e664a11340a..5591fd24e4b2 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -293,8 +293,8 @@ int fscrypt_get_encryption_info(struct inode *inode)
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_flags = ctx.flags;
-   memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
-   sizeof(crypt_info->ci_master_key));
+   memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
+  FS_KEY_DESCRIPTOR_SIZE);
 
res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize);
if (res)
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index d6beb48f08fc..fe525da9e79c 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -226,7 +226,8 @@ int fscrypt_has_permitted_context(struct inode *parent, 
struct inode *child)
child_ci = child->i_crypt_info;
 
if (parent_ci && child_ci) {
-   return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
+   return memcmp(parent_ci->ci_master_key_descriptor,
+ child_ci->ci_master_key_descriptor,
  FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_context_version ==
 child_ci->ci_context_version) &&
@@ -284,7 +285,7 @@ int fscrypt_inherit_context(struct inode *parent, struct 
inode *child,
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
-   memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+   memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor,
   FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
if (ctx.version != FSCRYPT_CONTEXT_V1)
-- 
2.14.0.rc0.400.g1c36432dff-goog

Re: [PATCH v5 2/5] lib: Add zstd modules

2017-08-10 Thread Eric Biggers

On Wed, Aug 09, 2017 at 07:35:53PM -0700, Nick Terrell wrote:
>
> It can compress at speeds approaching lz4, and quality approaching lzma.

Well, for a very loose definition of "approaching", and certainly not at the
same time.  I doubt there's a use case for using the highest compression levels
in kernel mode --- especially the ones using zstd_opt.h.

> 
> The code was ported from the upstream zstd source repository.

What version?

> `linux/zstd.h` header was modified to match linux kernel style.
> The cross-platform and allocation code was stripped out. Instead zstd
> requires the caller to pass a preallocated workspace. The source files
> were clang-formatted [1] to match the Linux Kernel style as much as
> possible. 

It would be easier to compare to the upstream version if it was not all
reformatted.  There is a chance that bugs were introduced by Linux-specific
changes, and it would be nice if they could be easily reviewed.  (Also I don't
know what clang-format settings you used, but there are still a lot of
differences from the Linux coding style.)

> 
> I benchmarked zstd compression as a special character device. I ran zstd
> and zlib compression at several levels, as well as performing no
> compression, which measure the time spent copying the data to kernel space.
> Data is passed to the compresser 4096 B at a time. The benchmark file is
> located in the upstream zstd source repository under
> `contrib/linux-kernel/zstd_compress_test.c` [2].
> 
> I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
> The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor,
> 16 GB of RAM, and a SSD. I benchmarked using `silesia.tar` [3], which is
> 211,988,480 B large. Run the following commands for the benchmark:
> 
> sudo modprobe zstd_compress_test
> sudo mknod zstd_compress_test c 245 0
> sudo cp silesia.tar zstd_compress_test
> 
> The time is reported by the time of the userland `cp`.
> The MB/s is computed with
> 
> 1,536,217,008 B / time(buffer size, hash)
> 
> which includes the time to copy from userland.
> The Adjusted MB/s is computed with
> 
> 1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)).
> 
> The memory reported is the amount of memory the compressor requests.
> 
> | Method   | Size (B) | Time (s) | Ratio | MB/s| Adj MB/s | Mem (MB) |
> |--|--|--|---|-|--|--|
> | none | 11988480 |0.100 | 1 | 2119.88 |- |- |
> | zstd -1  | 73645762 |1.044 | 2.878 |  203.05 |   224.56 | 1.23 |
> | zstd -3  | 66988878 |1.761 | 3.165 |  120.38 |   127.63 | 2.47 |
> | zstd -5  | 65001259 |2.563 | 3.261 |   82.71 |86.07 | 2.86 |
> | zstd -10 | 60165346 |   13.242 | 3.523 |   16.01 |16.13 |13.22 |
> | zstd -15 | 58009756 |   47.601 | 3.654 |4.45 | 4.46 |21.61 |
> | zstd -19 | 54014593 |  102.835 | 3.925 |2.06 | 2.06 |60.15 |
> | zlib -1  | 77260026 |2.895 | 2.744 |   73.23 |75.85 | 0.27 |
> | zlib -3  | 72972206 |4.116 | 2.905 |   51.50 |52.79 | 0.27 |
> | zlib -6  | 68190360 |9.633 | 3.109 |   22.01 |22.24 | 0.27 |
> | zlib -9  | 67613382 |   22.554 | 3.135 |9.40 | 9.44 | 0.27 |
> 

Theses benchmarks are misleading because they compress the whole file as a
single stream without resetting the dictionary, which isn't how data will
typically be compressed in kernel mode.  With filesystem compression the data
has to be divided into small chunks that can each be decompressed independently.
That eliminates one of the primary advantages of Zstandard (support for large
dictionary sizes).

Eric

Re: [PATCH v5 2/5] lib: Add zstd modules

2017-08-10 Thread Eric Biggers

On Thu, Aug 10, 2017 at 07:32:18AM -0400, Austin S. Hemmelgarn wrote:
> On 2017-08-10 04:30, Eric Biggers wrote:
> >On Wed, Aug 09, 2017 at 07:35:53PM -0700, Nick Terrell wrote:
> >>
> >>It can compress at speeds approaching lz4, and quality approaching lzma.
> >
> >Well, for a very loose definition of "approaching", and certainly not at the
> >same time.  I doubt there's a use case for using the highest compression 
> >levels
> >in kernel mode --- especially the ones using zstd_opt.h.
> Large data-sets with WORM access patterns and infrequent writes
> immediately come to mind as a use case for the highest compression
> level.
> 
> As a more specific example, the company I work for has a very large
> amount of documentation, and we keep all old versions.  This is all
> stored on a file server which is currently using BTRFS.  Once a
> document is written, it's almost never rewritten, so write
> performance only matters for the first write.  However, they're read
> back pretty frequently, so we need good read performance.  As of
> right now, the system is set to use LZO compression by default, and
> then when a new document is added, the previous version of that
> document gets re-compressed using zlib compression, which actually
> results in pretty significant space savings most of the time.  I
> would absolutely love to use zstd compression with this system with
> the highest compression level, because most people don't care how
> long it takes to write the file out, but they do care how long it
> takes to read a file (even if it's an older version).

This may be a reasonable use case, but note this cannot just be the regular
"zstd" compression setting, since filesystem compression by default must provide
reasonable performance for many different access patterns.  See the patch in
this series which actually adds zstd compression to btrfs; it only uses level 1.
I do not see a patch which adds a higher compression mode.  It would need to be
a special setting like "zstdhc" that users could opt-in to on specific
directories.  It also would need to be compared to simply compressing in
userspace.  In many cases compressing in userspace is probably the better
solution for the use case in question because it works on any filesystem, allows
using any compression algorithm, and if random access is not needed it is
possible to compress each file as a single stream (like a .xz file), which
produces a much better compression ratio than the block-by-block compression
that filesystems have to use.

Note also that LZ4HC is in the kernel source tree currently but no one is using
it vs. the regular LZ4.  I think it is the kind of thing that sounded useful
originally, but at the end of the day no one really wants to use it in kernel
mode.  I'd certainly be interested in actual patches, though.

Eric

Re: [PATCH v5 2/5] lib: Add zstd modules

2017-08-10 Thread Eric Biggers

On Thu, Aug 10, 2017 at 10:57:01AM -0400, Austin S. Hemmelgarn wrote:
> Also didn't think to mention this, but I could see the max level
> being very popular for use with SquashFS root filesystems used in
> LiveCD's. Currently, they have to decide between read performance
> and image size, while zstd would provide both.

The high compression levels of Zstandard are indeed a great fit for SquashFS,
but SquashFS images are created in userspace by squashfs-tools.  The kernel only
needs to be able to decompress them.

(Also, while Zstandard provides very good tradeoffs and will probably become the
preferred algorithm for SquashFS, it's misleading to imply that users won't have
to make decisions anymore.  It does not compress as well as XZ or decompress as
fast as LZ4, except maybe in very carefully crafted benchmarks.)

Eric

Re: [PATCH v5 2/5] lib: Add zstd modules

2017-08-10 Thread Eric Biggers

On Thu, Aug 10, 2017 at 01:41:21PM -0400, Chris Mason wrote:
> On 08/10/2017 04:30 AM, Eric Biggers wrote:
> >On Wed, Aug 09, 2017 at 07:35:53PM -0700, Nick Terrell wrote:
> 
> >>The memory reported is the amount of memory the compressor requests.
> >>
> >>| Method   | Size (B) | Time (s) | Ratio | MB/s| Adj MB/s | Mem (MB) |
> >>|--|--|--|---|-|--|--|
> >>| none | 11988480 |0.100 | 1 | 2119.88 |- |- |
> >>| zstd -1  | 73645762 |1.044 | 2.878 |  203.05 |   224.56 | 1.23 |
> >>| zstd -3  | 66988878 |1.761 | 3.165 |  120.38 |   127.63 | 2.47 |
> >>| zstd -5  | 65001259 |2.563 | 3.261 |   82.71 |86.07 | 2.86 |
> >>| zstd -10 | 60165346 |   13.242 | 3.523 |   16.01 |16.13 |13.22 |
> >>| zstd -15 | 58009756 |   47.601 | 3.654 |4.45 | 4.46 |21.61 |
> >>| zstd -19 | 54014593 |  102.835 | 3.925 |2.06 | 2.06 |60.15 |
> >>| zlib -1  | 77260026 |2.895 | 2.744 |   73.23 |75.85 | 0.27 |
> >>| zlib -3  | 72972206 |4.116 | 2.905 |   51.50 |52.79 | 0.27 |
> >>| zlib -6  | 68190360 |9.633 | 3.109 |   22.01 |22.24 | 0.27 |
> >>| zlib -9  | 67613382 |   22.554 | 3.135 |9.40 | 9.44 | 0.27 |
> >>
> >
> >Theses benchmarks are misleading because they compress the whole file as a
> >single stream without resetting the dictionary, which isn't how data will
> >typically be compressed in kernel mode.  With filesystem compression the data
> >has to be divided into small chunks that can each be decompressed 
> >independently.
> >That eliminates one of the primary advantages of Zstandard (support for large
> >dictionary sizes).
> 
> I did btrfs benchmarks of kernel trees and other normal data sets as
> well.  The numbers were in line with what Nick is posting here.
> zstd is a big win over both lzo and zlib from a btrfs point of view.
> 
> It's true Nick's patches only support a single compression level in
> btrfs, but that's because btrfs doesn't have a way to pass in the
> compression ratio.  It could easily be a mount option, it was just
> outside the scope of Nick's initial work.
> 

I am not surprised --- Zstandard is closer to the state of the art, both
format-wise and implementation-wise, than the other choices in BTRFS.  My point
is that benchmarks need to account for how much data is compressed at a time.
This is a common mistake when comparing different compression algorithms; the
algorithm name and compression level do not tell the whole story.  The
dictionary size is extremely significant.  No one is going to compress or
decompress a 200 MB file as a single stream in kernel mode, so it does not make
sense to justify adding Zstandard *to the kernel* based on such a benchmark.  It
is going to be divided into chunks.  How big are the chunks in BTRFS?  I thought
that it compressed only one page (4 KiB) at a time, but I hope that has been, or
is being, improved; 32 KiB - 128 KiB should be a better amount.  (And if the
amount of data compressed at a time happens to be different between the
different algorithms, note that BTRFS benchmarks are likely to be measuring that
as much as the algorithms themselves.)

Eric

Re: [PATCH 00/12] x86/crypto: Fix RBP usage in several crypto .S files

2017-09-01 Thread Eric Biggers

Hi Josh,

On Tue, Aug 29, 2017 at 01:05:33PM -0500, Josh Poimboeuf wrote:
> Many of the x86 crypto functions use RBP as a temporary register.  This
> breaks frame pointer convention, and breaks stack traces when unwinding
> from an interrupt in the crypto code.
> 
> Convert most* of them to leave RBP alone.
> 
> These pass the crypto boot tests for me.  Any further testing would be
> appreciated!
> 
> [*] There are still a few crypto files left that need fixing, but the
> fixes weren't trivial and nobody reported unwinder warnings about
> them yet, so I'm skipping them for now.
> 
> Josh Poimboeuf (12):
>   x86/crypto: Fix RBP usage in blowfish-x86_64-asm_64.S
>   x86/crypto: Fix RBP usage in camellia-x86_64-asm_64.S
>   x86/crypto: Fix RBP usage in cast5-avx-x86_64-asm_64.S
>   x86/crypto: Fix RBP usage in cast6-avx-x86_64-asm_64.S
>   x86/crypto: Fix RBP usage in des3_ede-asm_64.S
>   x86/crypto: Fix RBP usage in sha1_avx2_x86_64_asm.S
>   x86/crypto: Fix RBP usage in sha1_ssse3_asm.S
>   x86/crypto: Fix RBP usage in sha256-avx-asm.S
>   x86/crypto: Fix RBP usage in sha256-avx2-asm.S
>   x86/crypto: Fix RBP usage in sha256-ssse3-asm.S
>   x86/crypto: Fix RBP usage in sha512-avx2-asm.S
>   x86/crypto: Fix RBP usage in twofish-avx-x86_64-asm_64.S
> 

Thanks for fixing these!  I don't have time to review these in detail, but I ran
the crypto self-tests on the affected algorithms, and they all pass.  I also
benchmarked them before and after; the only noticable performance difference was
that sha256-avx2 and sha512-avx2 became a few percent slower.  I don't suppose
there is a way around that?  Otherwise it's probably not a big deal.

For reference, this was the list of affected algorithms I used:

shash: sha1-ssse3, sha1-avx, sha1-avx2, sha256-ssse3, sha256-avx, sha256-avx2,
sha512-ssse3, sha512-avx, sha512-avx2

cipher: blowfish-asm, camellia-asm, des3_ede-asm

skcipher: ecb-blowfish-asm, cbc-blowfish-asm, ctr-blowfish-asm, ecb-cast5-avx,
cbc-cast5-avx, ctr-cast5-avx, ecb-cast6-avx, cbc-cast6-avx, 
ctr-cast6-avx,
lrw-cast6-avx, xts-cast6-avx, ecb-camellia-asm, cbc-camellia-asm,
ctr-camellia-asm, lrw-camellia-asm, xts-camellia-asm, ecb-des3_ede-asm,
cbc-des3_ede-asm, ctr-des3_ede-asm, ecb-twofish-avx, cbc-twofish-avx,
ctr-twofish-avx, lrw-twofish-avx, xts-twofish-avx

Eric

Re: [PATCH 00/12] x86/crypto: Fix RBP usage in several crypto .S files

2017-09-07 Thread Eric Biggers

On Thu, Sep 07, 2017 at 09:15:34AM +0200, Ingo Molnar wrote:
> 
> * Eric Biggers  wrote:
> 
> > Thanks for fixing these!  I don't have time to review these in detail, but 
> > I ran
> > the crypto self-tests on the affected algorithms, and they all pass.  I also
> > benchmarked them before and after; the only noticable performance 
> > difference was
> > that sha256-avx2 and sha512-avx2 became a few percent slower.  I don't 
> > suppose
> > there is a way around that?  Otherwise it's probably not a big deal.
> 
> Which CPU model did you use for the test?
> 
> Thanks,
> 
>   Ingo

This was on Haswell, "Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz".

Eric

Re: [PATCH 00/12] x86/crypto: Fix RBP usage in several crypto .S files

2017-09-08 Thread Eric Biggers

On Thu, Sep 07, 2017 at 11:26:47PM +0200, Ingo Molnar wrote:
> 
> * Eric Biggers  wrote:
> 
> > On Thu, Sep 07, 2017 at 09:15:34AM +0200, Ingo Molnar wrote:
> > > 
> > > * Eric Biggers  wrote:
> > > 
> > > > Thanks for fixing these!  I don't have time to review these in detail, 
> > > > but I ran
> > > > the crypto self-tests on the affected algorithms, and they all pass.  I 
> > > > also
> > > > benchmarked them before and after; the only noticable performance 
> > > > difference was
> > > > that sha256-avx2 and sha512-avx2 became a few percent slower.  I don't 
> > > > suppose
> > > > there is a way around that?  Otherwise it's probably not a big deal.
> > > 
> > > Which CPU model did you use for the test?
> > > 
> > > Thanks,
> > > 
> > >   Ingo
> > 
> > This was on Haswell, "Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz".
> 
> Any chance to test this with the latest microarchitecture - any Skylake 
> derivative 
> Intel CPU you have access to?
> 
> Thanks,
> 
>   Ingo

Tested with Skylake, "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz".  The results
were the following which seemed a bit worse than Haswell:

sha256-avx2 became 3.5% slower
sha512-avx2 became 7.5% slower

Note: it's tricky to benchmark this, especially with just a few percent
difference, so don't read too much into the exact numbers.

Eric

Re: [PATCH 00/12] x86/crypto: Fix RBP usage in several crypto .S files

2017-09-14 Thread Eric Biggers

Hi Josh,

On Wed, Sep 13, 2017 at 05:33:03PM -0500, Josh Poimboeuf wrote:
> And here's v2 of the sha512-avx2 patch.  It should hopefully gain back
> most of the performance lost by v1.
> 
> From: Josh Poimboeuf 
> Subject: [PATCH] x86/crypto: Fix RBP usage in sha512-avx2-asm.S
> 
> Using RBP as a temporary register breaks frame pointer convention and
> breaks stack traces when unwinding from an interrupt in the crypto code.
> 
> Mix things up a little bit to get rid of the RBP usage, without
> destroying performance.  Use RDI instead of RBP for the TBL pointer.
> That will clobber CTX, so save CTX on the stack and use RDI as CTX
> before it gets clobbered, and R12 as CTX after it gets clobbered.
> 
> Also remove the unused y4 variable.
> 

I tested the v2 patches for both sha256-avx2 and sha512-avx2 on Skylake.  They
both pass the crypto self-tests, and there was no noticable performance
difference compared to the unpatched versions.  Thanks!

Eric

Re: [PATCH 00/12] x86/crypto: Fix RBP usage in several crypto .S files

2017-09-15 Thread Eric Biggers

On Fri, Sep 15, 2017 at 07:34:31AM +0200, Ingo Molnar wrote:
> 
> * Eric Biggers  wrote:
> 
> > Hi Josh,
> > 
> > On Wed, Sep 13, 2017 at 05:33:03PM -0500, Josh Poimboeuf wrote:
> > > And here's v2 of the sha512-avx2 patch.  It should hopefully gain back
> > > most of the performance lost by v1.
> > > 
> > > From: Josh Poimboeuf 
> > > Subject: [PATCH] x86/crypto: Fix RBP usage in sha512-avx2-asm.S
> > > 
> > > Using RBP as a temporary register breaks frame pointer convention and
> > > breaks stack traces when unwinding from an interrupt in the crypto code.
> > > 
> > > Mix things up a little bit to get rid of the RBP usage, without
> > > destroying performance.  Use RDI instead of RBP for the TBL pointer.
> > > That will clobber CTX, so save CTX on the stack and use RDI as CTX
> > > before it gets clobbered, and R12 as CTX after it gets clobbered.
> > > 
> > > Also remove the unused y4 variable.
> > > 
> > 
> > I tested the v2 patches for both sha256-avx2 and sha512-avx2 on Skylake.  
> > They
> > both pass the crypto self-tests, and there was no noticable performance
> > difference compared to the unpatched versions.  Thanks!
> 
> Cool, thanks for review and the testing! Can we add your Tested-by + Acked-by 
> tags 
> to the patches?
> 

Yes, that's fine for all the patches in the series.

Will these patches go in through the crypto tree or through the x86 tree?

Eric

Re: [GIT PULL] KEYS: Fixes and crypto fixes

2017-09-27 Thread Eric Biggers

On Thu, Sep 28, 2017 at 09:14:58AM +1000, James Morris wrote:
> On Wed, 27 Sep 2017, David Howells wrote:
> 
> >  (2) Fixing big_key to use safe crypto from Jason A. Donenfeld.
> > 
> 
> I'm concerned about the lack of crypto review mentioned by Jason -- I 
> wonder if we can get this rewrite any more review from crypto folk.
> 
> Also, are there any tests for this code?  If not, it would be good to make 
> some.
> 

There is a test for the big_key key type in the keyutils test suite.  I also
manually tested Jason's change.  And as far as I can tell there isn't actually a
whole lot to test besides adding a big_key larger than BIG_KEY_FILE_THRESHOLD
bytes, reading it back, and verifying that the data is unchanged --- since that
covers the code that was changed.  An earlier version of the patch produced a
warning with CONFIG_DEBUG_SG=y since it put the aead_request on the stack, but
that's been fixed.

It would be great if someone else would comment on the crypto too, but for what
it's worth I'm satisfied with the crypto changes.  GCM is a much better choice
than ECB as long as we don't repeat (key, IV) pairs --- which we don't.  And in
any case ECB mode makes no sense in this context; you'd need a *very* good
reason to actually choose to encrypt something with ECB mode.  Unfortunately it
tends to be a favorite of people who don't understand encryption modes...

Plus, getting all the randomness at boot time didn't make sense because that's
when entropy is the most scarce.

Eric

[PATCH 3/4] crypto: qat - fix double free of ctx->p

2017-11-01 Thread Eric Biggers

From: Eric Biggers 

When setting the secret with the "qat-dh" Diffie-Hellman implementation,
if allocating 'g' failed, then 'p' was freed twice: once immediately,
and once later when the crypto_kpp tfm was destroyed.  Fix it by using
qat_dh_clear_ctx() in the error paths, as that sets the pointers to
NULL.

Fixes: c9839143ebbf ("crypto: qat - Add DH support")
Cc:  # v4.8+
Signed-off-by: Eric Biggers 
---
 drivers/crypto/qat/qat_common/qat_asym_algs.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c 
b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 6f5dd68449c6..7655fdb499de 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -462,11 +462,8 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, 
struct dh *params)
}
 
ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL);
-   if (!ctx->g) {
-   dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p);
-   ctx->p = NULL;
+   if (!ctx->g)
return -ENOMEM;
-   }
memcpy(ctx->g + (ctx->p_size - params->g_size), params->g,
   params->g_size);
 
@@ -507,18 +504,22 @@ static int qat_dh_set_secret(struct crypto_kpp *tfm, 
const void *buf,
 
ret = qat_dh_set_params(ctx, ¶ms);
if (ret < 0)
-   return ret;
+   goto err_clear_ctx;
 
ctx->xa = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_xa,
  GFP_KERNEL);
if (!ctx->xa) {
-   qat_dh_clear_ctx(dev, ctx);
-   return -ENOMEM;
+   ret = -ENOMEM;
+   goto err_clear_ctx;
}
memcpy(ctx->xa + (ctx->p_size - params.key_size), params.key,
   params.key_size);
 
return 0;
+
+err_clear_ctx:
+   qat_dh_clear_ctx(dev, ctx);
+   return ret;
 }
 
 static unsigned int qat_dh_max_size(struct crypto_kpp *tfm)
-- 
2.15.0.403.gc27cc4dac6-goog

[PATCH 4/4] crypto: dh - don't permit 'key' or 'g' size longer than 'p'

2017-11-01 Thread Eric Biggers

From: Eric Biggers 

The "qat-dh" DH implementation assumes that 'key' and 'g' can be copied
into a buffer with size 'p_size'.  However it was never checked that
that was actually the case, which allowed users to cause a buffer
underflow via KEYCTL_DH_COMPUTE.

Fix this by updating crypto_dh_decode_key() to verify this precondition
for all DH implementations.

Fixes: c9839143ebbf ("crypto: qat - Add DH support")
Cc:  # v4.8+
Signed-off-by: Eric Biggers 
---
 crypto/dh_helper.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 708ae20d2d3c..7f00c771fe8d 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -83,6 +83,14 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, 
struct dh *params)
if (secret.len != crypto_dh_key_len(params))
return -EINVAL;
 
+   /*
+* Don't permit the buffer for 'key' or 'g' to be larger than 'p', since
+* some drivers assume otherwise.
+*/
+   if (params->key_size > params->p_size ||
+   params->g_size > params->p_size)
+   return -EINVAL;
+
/* Don't allocate memory. Set pointers to data within
 * the given buffer
 */
-- 
2.15.0.403.gc27cc4dac6-goog

[PATCH 2/4] crypto: dh - don't permit 'p' to be 0

2017-11-01 Thread Eric Biggers

From: Eric Biggers 

If 'p' is 0 for the software Diffie-Hellman implementation, then
dh_max_size() returns 0.  In the case of KEYCTL_DH_COMPUTE, this causes
ZERO_SIZE_POINTER to be passed to sg_init_one(), which with
CONFIG_DEBUG_SG=y triggers the 'BUG_ON(!virt_addr_valid(buf));' in
sg_set_buf().

Fix this by making crypto_dh_decode_key() reject 0 for 'p'.  p=0 makes
no sense for any DH implementation because 'p' is supposed to be a prime
number.  Moreover, 'mod 0' is not mathematically defined.

Bug report:

kernel BUG at ./include/linux/scatterlist.h:140!
invalid opcode:  [#1] SMP KASAN
CPU: 0 PID: 27112 Comm: syz-executor2 Not tainted 
4.14.0-rc7-00010-gf5dbb5d0ce32-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.10.3-20171021_125229-anatol 04/01/2014
task: 88006caac0c0 task.stack: 88006c7c8000
RIP: 0010:sg_set_buf include/linux/scatterlist.h:140 [inline]
RIP: 0010:sg_init_one+0x1b3/0x240 lib/scatterlist.c:156
RSP: 0018:88006c7cfb08 EFLAGS: 00010216
RAX: 0001 RBX: 88006c7cfe30 RCX: 64ee
RDX: 81cf64c3 RSI: c9d72000 RDI: 92e937e0
RBP: 88006c7cfb30 R08: ed000d8f9fab R09: 88006c7cfd30
R10: 0005 R11: ed000d8f9faa R12: 88006c7cfd30
R13:  R14: 0010 R15: 88006c7cfc50
FS:  7fce190fa700() GS:88003ea0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fffc6b33db8 CR3: 3cf64000 CR4: 06f0
Call Trace:
 __keyctl_dh_compute+0xa95/0x19b0 security/keys/dh.c:360
 keyctl_dh_compute+0xac/0x100 security/keys/dh.c:434
 SYSC_keyctl security/keys/keyctl.c:1745 [inline]
 SyS_keyctl+0x72/0x2c0 security/keys/keyctl.c:1641
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x4585c9
RSP: 002b:7fce190f9bd8 EFLAGS: 0216 ORIG_RAX: 00fa
RAX: ffda RBX: 00738020 RCX: 004585c9
RDX: 2000d000 RSI: 2ff4 RDI: 0017
RBP: 0046 R08: 20008000 R09: 
R10:  R11: 0216 R12: 7fff6e610cde
R13: 7fff6e610cdf R14: 7fce190fa700 R15: 
Code: 03 0f b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 33 5b 
45 89 6c 24 14 41 5c 41 5d 41 5e 41 5f 5d c3 e8 fd 8f 68 ff <0f> 0b e8 f6 8f 68 
ff 0f 0b e8 ef 8f 68 ff 0f 0b e8 e8 8f 68 ff 20
RIP: sg_set_buf include/linux/scatterlist.h:140 [inline] RSP: 
88006c7cfb08
RIP: sg_init_one+0x1b3/0x240 lib/scatterlist.c:156 RSP: 88006c7cfb08

Fixes: 802c7f1c84e4 ("crypto: dh - Add DH software implementation")
Cc:  # v4.8+
Signed-off-by: Eric Biggers 
---
 crypto/dh_helper.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 8ba8a3f82620..708ae20d2d3c 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -90,6 +90,14 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, 
struct dh *params)
params->p = (void *)(ptr + params->key_size);
params->g = (void *)(ptr + params->key_size + params->p_size);
 
+   /*
+* Don't permit 'p' to be 0.  It's not a prime number, and it's subject
+* to corner cases such as 'mod 0' being undefined or
+* crypto_kpp_maxsize() returning 0.
+*/
+   if (memchr_inv(params->p, 0, params->p_size) == NULL)
+   return -EINVAL;
+
return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
-- 
2.15.0.403.gc27cc4dac6-goog

[PATCH 1/4] crypto: dh - fix double free of ctx->p

2017-11-01 Thread Eric Biggers

From: Eric Biggers 

When setting the secret with the software Diffie-Hellman implementation,
if allocating 'g' failed (e.g. if it was longer than
MAX_EXTERN_MPI_BITS), then 'p' was freed twice: once immediately, and
once later when the crypto_kpp tfm was destroyed.  Fix it by using
dh_free_ctx() in the error paths, as that sets the pointers to NULL.

KASAN report:

MPI: mpi too large (32760 bits)
==
BUG: KASAN: use-after-free in mpi_free+0x131/0x170
Read of size 4 at addr 88006c7cdf90 by task reproduce_doubl/367

CPU: 1 PID: 367 Comm: reproduce_doubl Not tainted 
4.14.0-rc7-00040-g05298abde6fe #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 dump_stack+0xb3/0x10b
 ? mpi_free+0x131/0x170
 print_address_description+0x79/0x2a0
 ? mpi_free+0x131/0x170
 kasan_report+0x236/0x340
 ? akcipher_register_instance+0x90/0x90
 __asan_report_load4_noabort+0x14/0x20
 mpi_free+0x131/0x170
 ? akcipher_register_instance+0x90/0x90
 dh_exit_tfm+0x3d/0x140
 crypto_kpp_exit_tfm+0x52/0x70
 crypto_destroy_tfm+0xb3/0x250
 __keyctl_dh_compute+0x640/0xe90
 ? kasan_slab_free+0x12f/0x180
 ? dh_data_from_key+0x240/0x240
 ? key_create_or_update+0x1ee/0xb20
 ? key_instantiate_and_link+0x440/0x440
 ? lock_contended+0xee0/0xee0
 ? kfree+0xcf/0x210
 ? SyS_add_key+0x268/0x340
 keyctl_dh_compute+0xb3/0xf1
 ? __keyctl_dh_compute+0xe90/0xe90
 ? SyS_add_key+0x26d/0x340
 ? entry_SYSCALL_64_fastpath+0x5/0xbe
 ? trace_hardirqs_on_caller+0x3f4/0x560
 SyS_keyctl+0x72/0x2c0
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x43ccf9
RSP: 002b:7ffeeec96158 EFLAGS: 0246 ORIG_RAX: 00fa
RAX: ffda RBX: 0248b9b9 RCX: 0043ccf9
RDX: 7ffeeec96170 RSI: 7ffeeec96160 RDI: 0017
RBP: 0046 R08:  R09: 0248b9b9143dc936
R10: 1000 R11: 0246 R12: 
R13: 00409670 R14: 00409700 R15: 

Allocated by task 367:
 save_stack_trace+0x16/0x20
 kasan_kmalloc+0xeb/0x180
 kmem_cache_alloc_trace+0x114/0x300
 mpi_alloc+0x4b/0x230
 mpi_read_raw_data+0xbe/0x360
 dh_set_secret+0x1dc/0x460
 __keyctl_dh_compute+0x623/0xe90
 keyctl_dh_compute+0xb3/0xf1
 SyS_keyctl+0x72/0x2c0
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Freed by task 367:
 save_stack_trace+0x16/0x20
 kasan_slab_free+0xab/0x180
 kfree+0xb5/0x210
 mpi_free+0xcb/0x170
 dh_set_secret+0x2d7/0x460
 __keyctl_dh_compute+0x623/0xe90
 keyctl_dh_compute+0xb3/0xf1
 SyS_keyctl+0x72/0x2c0
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Fixes: 802c7f1c84e4 ("crypto: dh - Add DH software implementation")
Cc:  # v4.8+
Signed-off-by: Eric Biggers 
---
 crypto/dh.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index b1032a5c1bfa..b488f1782ced 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -71,10 +71,8 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh 
*params)
return -EINVAL;
 
ctx->g = mpi_read_raw_data(params->g, params->g_size);
-   if (!ctx->g) {
-   mpi_free(ctx->p);
+   if (!ctx->g)
return -EINVAL;
-   }
 
return 0;
 }
@@ -89,18 +87,20 @@ static int dh_set_secret(struct crypto_kpp *tfm, const void 
*buf,
dh_free_ctx(ctx);
 
if (crypto_dh_decode_key(buf, len, ¶ms) < 0)
-   return -EINVAL;
+   goto err_free_ctx;
 
if (dh_set_params(ctx, ¶ms) < 0)
-   return -EINVAL;
+   goto err_free_ctx;
 
ctx->xa = mpi_read_raw_data(params.key, params.key_size);
-   if (!ctx->xa) {
-   dh_clear_params(ctx);
-   return -EINVAL;
-   }
+   if (!ctx->xa)
+   goto err_free_ctx;
 
return 0;
+
+err_free_ctx:
+   dh_free_ctx(ctx);
+   return -EINVAL;
 }
 
 static int dh_compute_value(struct kpp_request *req)
-- 
2.15.0.403.gc27cc4dac6-goog

[PATCH 0/4] crypto: dh - input validation fixes

2017-11-01 Thread Eric Biggers

From: Eric Biggers 

This series fixes several corner cases in the Diffie-Hellman key
exchange implementations:

- With CONFIG_DEBUG_SG=y and the software DH implementation, setting 'p'
  to 0 caused a BUG_ON().
- Both the software and QAT DH implementations had a double-free bug in
  the case where 'g' could not be allocated.
- With the QAT DH implementation, setting 'g' or 'key' larger than 'p'
  caused a buffer underflow.

Note that in kernels configured with CONFIG_KEY_DH_OPERATIONS=y, these
bugs are reachable by unprivileged users via KEYCTL_DH_COMPUTE.

Eric Biggers (4):
  crypto: dh - fix double free of ctx->p
  crypto: dh - don't permit 'p' to be 0
  crypto: qat - fix double free of ctx->p
  crypto: dh - don't permit 'key' or 'g' size longer than 'p'

 crypto/dh.c   | 18 +-
 crypto/dh_helper.c| 16 
 drivers/crypto/qat/qat_common/qat_asym_algs.c | 15 ---
 3 files changed, 33 insertions(+), 16 deletions(-)

-- 
2.15.0.403.gc27cc4dac6-goog

Re: invalid opcode: 0000 [#1] SMP [aesni_intel]

2017-11-01 Thread Eric Biggers

On Mon, Oct 23, 2017 at 07:01:32PM +0300, SviMik wrote:
> Hi!
> 
> Got the following kernel panic:
> 
> invalid opcode:  [#1] SMP
> CPU: 0 PID: 1449 Comm: openvpn Not tainted 4.8.13-1.el6.elrepo.x86_64 #1
> cut
> Call Trace:
> 
> [] ? enqueue_entity+0x45e/0x6f0
> [] ? aesni_gcm_enc_avx+0x95/0xd0 [aesni_intel]
> [] helper_rfc4106_encrypt+0x167/0x2f0 [aesni_intel]
> [] rfc4106_encrypt+0x5b/0x90 [aesni_intel]
> cut
> 
> The detailed bug report with full oops dump can be found here:
> https://bugzilla.kernel.org/show_bug.cgi?id=197363
> 
> Could not trigger this bug intentionally, but it happened three times
> already (all three dumps are available).

Well, the program counter was at the 'vmovdqu' instruction, which is the first
AVX instruction in the function.  Is it possible you're running the kernel on a
broken hypervisor that claims AVX instructions are supported but really they
aren't?  Note that AVX is different from AES-NI; the code being executed has
both types of instructions.

(And as a side note, 4.8 is not being maintained as a long term support kernel,
so you really should switch to a different version.)

Eric

Re: [PATCH 1/4] crypto: dh - fix double free of ctx->p

2017-11-02 Thread Eric Biggers

Hi Tudor,

On Thu, Nov 02, 2017 at 12:55:56PM +0200, Tudor Ambarus wrote:
> Hi, Eric,
> 
> On 11/02/2017 12:25 AM, Eric Biggers wrote:
> >When setting the secret with the software Diffie-Hellman implementation,
> >if allocating 'g' failed (e.g. if it was longer than
> >MAX_EXTERN_MPI_BITS), then 'p' was freed twice: once immediately, and
> >once later when the crypto_kpp tfm was destroyed.  Fix it by using
> >dh_free_ctx() in the error paths, as that sets the pointers to NULL.
> 
> Other solution would be to have just an one-line patch that sets p to
> NULL after freeing it. The benefit of just setting p to NULL and not
> using dh_free_ctx() is that you'll spare some cpu cycles. If g fails,
> g and a will already be NULL, so freeing them and set them to NULL is
> redundant.
> 
> However, if you decide to always use dh_free_ctx(), you'll have to get
> rid of dh_clear_params(), because it will be used just in dh_free_ctx().
> You can copy dh_clear_params() body to dh_free_ctx().
> 

This is on an error path, so a few cycles don't matter.  I would much rather
have the simpler code, with less chance to introduce exploitable bugs.

Yes, I should inline dh_clear_params() into dh_free_ctx().

Eric

Re: [PATCH 2/4] crypto: dh - don't permit 'p' to be 0

2017-11-02 Thread Eric Biggers

On Thu, Nov 02, 2017 at 01:40:51PM +0200, Tudor Ambarus wrote:
> Hi, Eric,
> 
> On 11/02/2017 12:25 AM, Eric Biggers wrote:
> >If 'p' is 0 for the software Diffie-Hellman implementation, then
> >dh_max_size() returns 0.
> 
> dh_set_secret() returns -EINVAL if p_len < 1536, see
> dh_check_params_length(). What am I missing?
> 
> Cheers,
> ta

You pass a buffer containing 0's, not a buffer of length 0.  The buffer is
interpreted as an arbitrary precision integer, so any length buffer filled with
0's has the mathematical value 0.

Eric

Re: [PATCH 3/4] crypto: qat - fix double free of ctx->p

2017-11-02 Thread Eric Biggers

On Wed, Nov 01, 2017 at 03:25:16PM -0700, Eric Biggers wrote:
> From: Eric Biggers 
> 
> When setting the secret with the "qat-dh" Diffie-Hellman implementation,
> if allocating 'g' failed, then 'p' was freed twice: once immediately,
> and once later when the crypto_kpp tfm was destroyed.  Fix it by using
> qat_dh_clear_ctx() in the error paths, as that sets the pointers to
> NULL.
> 
> Fixes: c9839143ebbf ("crypto: qat - Add DH support")
> Cc:  # v4.8+
> Signed-off-by: Eric Biggers 
> ---
>  drivers/crypto/qat/qat_common/qat_asym_algs.c | 15 ---
>  1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c 
> b/drivers/crypto/qat/qat_common/qat_asym_algs.c
> index 6f5dd68449c6..7655fdb499de 100644
> --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
> +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
> @@ -462,11 +462,8 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, 
> struct dh *params)
>   }
>  
>   ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL);
> - if (!ctx->g) {
> - dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p);
> - ctx->p = NULL;
> + if (!ctx->g)

Sorry, I misread this code (and I didn't have the hardware to test this driver);
there is actually no bug here because it sets ctx->p to NULL.

I think we should still do this patch to simplify the code, but I'll update the
description to reflect that it's not actually fixing anything.

Eric

[PATCH v2 5/5] crypto: dh - Remove pointless checks for NULL 'p' and 'g'

2017-11-05 Thread Eric Biggers

From: Eric Biggers 

Neither 'p' nor 'g' can be NULL, as they were unpacked using
crypto_dh_decode_key().  And it makes no sense for them to be optional.
So remove the NULL checks that were copy-and-pasted into both modules.

Signed-off-by: Eric Biggers 
---
 crypto/dh.c   | 3 ---
 drivers/crypto/qat/qat_common/qat_asym_algs.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index aadaf36fb56f..5659fe7f446d 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -53,9 +53,6 @@ static int dh_check_params_length(unsigned int p_len)
 
 static int dh_set_params(struct dh_ctx *ctx, struct dh *params)
 {
-   if (unlikely(!params->p || !params->g))
-   return -EINVAL;
-
if (dh_check_params_length(params->p_size << 3))
return -EINVAL;
 
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c 
b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 7655fdb499de..13c52d6bf630 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -443,9 +443,6 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct 
dh *params)
struct qat_crypto_instance *inst = ctx->inst;
struct device *dev = &GET_DEV(inst->accel_dev);
 
-   if (unlikely(!params->p || !params->g))
-   return -EINVAL;
-
if (qat_dh_check_params_length(params->p_size << 3))
return -EINVAL;
 
-- 
2.15.0

[PATCH v2 0/5] crypto: dh - input validation fixes

2017-11-05 Thread Eric Biggers

This series fixes several corner cases in the Diffie-Hellman key
exchange implementations:

1. With the software DH implementation, using a large buffer for 'g'
   caused a double free.
2. With CONFIG_DEBUG_SG=y and the software DH implementation, setting 'p'
   to 0 caused a BUG_ON().
3. With the QAT DH implementation, setting 'key' or 'g' larger than 'p'
   caused a buffer underflow.

Note that in kernels configured with CONFIG_KEY_DH_OPERATIONS=y, these
bugs are reachable by unprivileged users via KEYCTL_DH_COMPUTE.

Patches 4 and 5 are cleanup only.

Eric Biggers (5):
  crypto: dh - Fix double free of ctx->p
  crypto: dh - Don't permit 'p' to be 0
  crypto: dh - Don't permit 'key' or 'g' size longer than 'p'
  crypto: qat - Clean up error handling in qat_dh_set_secret()
  crypto: dh - Remove pointless checks for NULL 'p' and 'g'

 crypto/dh.c   | 36 ++-
 crypto/dh_helper.c| 16 
 drivers/crypto/qat/qat_common/qat_asym_algs.c | 18 ++
 3 files changed, 37 insertions(+), 33 deletions(-)

-- 
2.15.0

[PATCH v2 4/5] crypto: qat - Clean up error handling in qat_dh_set_secret()

2017-11-05 Thread Eric Biggers

From: Eric Biggers 

Update the error handling in qat_dh_set_secret() to mirror
dh_set_secret().  The new version is less error-prone because freeing
memory and setting the pointers to NULL is now only done in one place.

Signed-off-by: Eric Biggers 
---
 drivers/crypto/qat/qat_common/qat_asym_algs.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c 
b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 6f5dd68449c6..7655fdb499de 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -462,11 +462,8 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, 
struct dh *params)
}
 
ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL);
-   if (!ctx->g) {
-   dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p);
-   ctx->p = NULL;
+   if (!ctx->g)
return -ENOMEM;
-   }
memcpy(ctx->g + (ctx->p_size - params->g_size), params->g,
   params->g_size);
 
@@ -507,18 +504,22 @@ static int qat_dh_set_secret(struct crypto_kpp *tfm, 
const void *buf,
 
ret = qat_dh_set_params(ctx, ¶ms);
if (ret < 0)
-   return ret;
+   goto err_clear_ctx;
 
ctx->xa = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_xa,
  GFP_KERNEL);
if (!ctx->xa) {
-   qat_dh_clear_ctx(dev, ctx);
-   return -ENOMEM;
+   ret = -ENOMEM;
+   goto err_clear_ctx;
}
memcpy(ctx->xa + (ctx->p_size - params.key_size), params.key,
   params.key_size);
 
return 0;
+
+err_clear_ctx:
+   qat_dh_clear_ctx(dev, ctx);
+   return ret;
 }
 
 static unsigned int qat_dh_max_size(struct crypto_kpp *tfm)
-- 
2.15.0

[PATCH v2 3/5] crypto: dh - Don't permit 'key' or 'g' size longer than 'p'

2017-11-05 Thread Eric Biggers

From: Eric Biggers 

The "qat-dh" DH implementation assumes that 'key' and 'g' can be copied
into a buffer with size 'p_size'.  However it was never checked that
that was actually the case, which most likely allowed users to cause a
buffer underflow via KEYCTL_DH_COMPUTE.

Fix this by updating crypto_dh_decode_key() to verify this precondition
for all DH implementations.

Fixes: c9839143ebbf ("crypto: qat - Add DH support")
Cc:  # v4.8+
Signed-off-by: Eric Biggers 
---
 crypto/dh_helper.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 708ae20d2d3c..7f00c771fe8d 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -83,6 +83,14 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, 
struct dh *params)
if (secret.len != crypto_dh_key_len(params))
return -EINVAL;
 
+   /*
+* Don't permit the buffer for 'key' or 'g' to be larger than 'p', since
+* some drivers assume otherwise.
+*/
+   if (params->key_size > params->p_size ||
+   params->g_size > params->p_size)
+   return -EINVAL;
+
/* Don't allocate memory. Set pointers to data within
 * the given buffer
 */
-- 
2.15.0

[PATCH v2 2/5] crypto: dh - Don't permit 'p' to be 0

2017-11-05 Thread Eric Biggers

From: Eric Biggers 

If 'p' is 0 for the software Diffie-Hellman implementation, then
dh_max_size() returns 0.  In the case of KEYCTL_DH_COMPUTE, this causes
ZERO_SIZE_PTR to be passed to sg_init_one(), which with
CONFIG_DEBUG_SG=y triggers the 'BUG_ON(!virt_addr_valid(buf));' in
sg_set_buf().

Fix this by making crypto_dh_decode_key() reject 0 for 'p'.  p=0 makes
no sense for any DH implementation because 'p' is supposed to be a prime
number.  Moreover, 'mod 0' is not mathematically defined.

Bug report:

kernel BUG at ./include/linux/scatterlist.h:140!
invalid opcode:  [#1] SMP KASAN
CPU: 0 PID: 27112 Comm: syz-executor2 Not tainted 
4.14.0-rc7-00010-gf5dbb5d0ce32-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.10.3-20171021_125229-anatol 04/01/2014
task: 88006caac0c0 task.stack: 88006c7c8000
RIP: 0010:sg_set_buf include/linux/scatterlist.h:140 [inline]
RIP: 0010:sg_init_one+0x1b3/0x240 lib/scatterlist.c:156
RSP: 0018:88006c7cfb08 EFLAGS: 00010216
RAX: 0001 RBX: 88006c7cfe30 RCX: 64ee
RDX: 81cf64c3 RSI: c9d72000 RDI: 92e937e0
RBP: 88006c7cfb30 R08: ed000d8f9fab R09: 88006c7cfd30
R10: 0005 R11: ed000d8f9faa R12: 88006c7cfd30
R13:  R14: 0010 R15: 88006c7cfc50
FS:  7fce190fa700() GS:88003ea0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fffc6b33db8 CR3: 3cf64000 CR4: 06f0
Call Trace:
 __keyctl_dh_compute+0xa95/0x19b0 security/keys/dh.c:360
 keyctl_dh_compute+0xac/0x100 security/keys/dh.c:434
 SYSC_keyctl security/keys/keyctl.c:1745 [inline]
 SyS_keyctl+0x72/0x2c0 security/keys/keyctl.c:1641
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x4585c9
RSP: 002b:7fce190f9bd8 EFLAGS: 0216 ORIG_RAX: 00fa
RAX: ffda RBX: 00738020 RCX: 004585c9
RDX: 2000d000 RSI: 2ff4 RDI: 0017
RBP: 0046 R08: 20008000 R09: 
R10:  R11: 0216 R12: 7fff6e610cde
R13: 7fff6e610cdf R14: 7fce190fa700 R15: 
Code: 03 0f b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 33 5b 
45 89 6c 24 14 41 5c 41 5d 41 5e 41 5f 5d c3 e8 fd 8f 68 ff <0f> 0b e8 f6 8f 68 
ff 0f 0b e8 ef 8f 68 ff 0f 0b e8 e8 8f 68 ff 20
RIP: sg_set_buf include/linux/scatterlist.h:140 [inline] RSP: 
88006c7cfb08
RIP: sg_init_one+0x1b3/0x240 lib/scatterlist.c:156 RSP: 88006c7cfb08

Fixes: 802c7f1c84e4 ("crypto: dh - Add DH software implementation")
Cc:  # v4.8+
Reviewed-by: Tudor Ambarus 
Signed-off-by: Eric Biggers 
---
 crypto/dh_helper.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 8ba8a3f82620..708ae20d2d3c 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -90,6 +90,14 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, 
struct dh *params)
params->p = (void *)(ptr + params->key_size);
params->g = (void *)(ptr + params->key_size + params->p_size);
 
+   /*
+* Don't permit 'p' to be 0.  It's not a prime number, and it's subject
+* to corner cases such as 'mod 0' being undefined or
+* crypto_kpp_maxsize() returning 0.
+*/
+   if (memchr_inv(params->p, 0, params->p_size) == NULL)
+   return -EINVAL;
+
return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
-- 
2.15.0

[PATCH v2 1/5] crypto: dh - Fix double free of ctx->p

2017-11-05 Thread Eric Biggers

From: Eric Biggers 

When setting the secret with the software Diffie-Hellman implementation,
if allocating 'g' failed (e.g. if it was longer than
MAX_EXTERN_MPI_BITS), then 'p' was freed twice: once immediately, and
once later when the crypto_kpp tfm was destroyed.

Fix it by using dh_free_ctx() (renamed to dh_clear_ctx()) in the error
paths, as that correctly sets the pointers to NULL.

KASAN report:

MPI: mpi too large (32760 bits)
==
BUG: KASAN: use-after-free in mpi_free+0x131/0x170
Read of size 4 at addr 88006c7cdf90 by task reproduce_doubl/367

CPU: 1 PID: 367 Comm: reproduce_doubl Not tainted 
4.14.0-rc7-00040-g05298abde6fe #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 dump_stack+0xb3/0x10b
 ? mpi_free+0x131/0x170
 print_address_description+0x79/0x2a0
 ? mpi_free+0x131/0x170
 kasan_report+0x236/0x340
 ? akcipher_register_instance+0x90/0x90
 __asan_report_load4_noabort+0x14/0x20
 mpi_free+0x131/0x170
 ? akcipher_register_instance+0x90/0x90
 dh_exit_tfm+0x3d/0x140
 crypto_kpp_exit_tfm+0x52/0x70
 crypto_destroy_tfm+0xb3/0x250
 __keyctl_dh_compute+0x640/0xe90
 ? kasan_slab_free+0x12f/0x180
 ? dh_data_from_key+0x240/0x240
 ? key_create_or_update+0x1ee/0xb20
 ? key_instantiate_and_link+0x440/0x440
 ? lock_contended+0xee0/0xee0
 ? kfree+0xcf/0x210
 ? SyS_add_key+0x268/0x340
 keyctl_dh_compute+0xb3/0xf1
 ? __keyctl_dh_compute+0xe90/0xe90
 ? SyS_add_key+0x26d/0x340
 ? entry_SYSCALL_64_fastpath+0x5/0xbe
 ? trace_hardirqs_on_caller+0x3f4/0x560
 SyS_keyctl+0x72/0x2c0
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x43ccf9
RSP: 002b:7ffeeec96158 EFLAGS: 0246 ORIG_RAX: 00fa
RAX: ffda RBX: 0248b9b9 RCX: 0043ccf9
RDX: 7ffeeec96170 RSI: 7ffeeec96160 RDI: 0017
RBP: 0046 R08:  R09: 0248b9b9143dc936
R10: 1000 R11: 0246 R12: 
R13: 00409670 R14: 00409700 R15: 

Allocated by task 367:
 save_stack_trace+0x16/0x20
 kasan_kmalloc+0xeb/0x180
 kmem_cache_alloc_trace+0x114/0x300
 mpi_alloc+0x4b/0x230
 mpi_read_raw_data+0xbe/0x360
 dh_set_secret+0x1dc/0x460
 __keyctl_dh_compute+0x623/0xe90
 keyctl_dh_compute+0xb3/0xf1
 SyS_keyctl+0x72/0x2c0
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Freed by task 367:
 save_stack_trace+0x16/0x20
 kasan_slab_free+0xab/0x180
 kfree+0xb5/0x210
 mpi_free+0xcb/0x170
 dh_set_secret+0x2d7/0x460
 __keyctl_dh_compute+0x623/0xe90
 keyctl_dh_compute+0xb3/0xf1
 SyS_keyctl+0x72/0x2c0
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Fixes: 802c7f1c84e4 ("crypto: dh - Add DH software implementation")
Cc:  # v4.8+
Signed-off-by: Eric Biggers 
---
 crypto/dh.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index b1032a5c1bfa..aadaf36fb56f 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -21,19 +21,12 @@ struct dh_ctx {
MPI xa;
 };
 
-static inline void dh_clear_params(struct dh_ctx *ctx)
+static void dh_clear_ctx(struct dh_ctx *ctx)
 {
mpi_free(ctx->p);
mpi_free(ctx->g);
-   ctx->p = NULL;
-   ctx->g = NULL;
-}
-
-static void dh_free_ctx(struct dh_ctx *ctx)
-{
-   dh_clear_params(ctx);
mpi_free(ctx->xa);
-   ctx->xa = NULL;
+   memset(ctx, 0, sizeof(*ctx));
 }
 
 /*
@@ -71,10 +64,8 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh 
*params)
return -EINVAL;
 
ctx->g = mpi_read_raw_data(params->g, params->g_size);
-   if (!ctx->g) {
-   mpi_free(ctx->p);
+   if (!ctx->g)
return -EINVAL;
-   }
 
return 0;
 }
@@ -86,21 +77,23 @@ static int dh_set_secret(struct crypto_kpp *tfm, const void 
*buf,
struct dh params;
 
/* Free the old MPI key if any */
-   dh_free_ctx(ctx);
+   dh_clear_ctx(ctx);
 
if (crypto_dh_decode_key(buf, len, ¶ms) < 0)
-   return -EINVAL;
+   goto err_clear_ctx;
 
if (dh_set_params(ctx, ¶ms) < 0)
-   return -EINVAL;
+   goto err_clear_ctx;
 
ctx->xa = mpi_read_raw_data(params.key, params.key_size);
-   if (!ctx->xa) {
-   dh_clear_params(ctx);
-   return -EINVAL;
-   }
+   if (!ctx->xa)
+   goto err_clear_ctx;
 
return 0;
+
+err_clear_ctx:
+   dh_clear_ctx(ctx);
+   return -EINVAL;
 }
 
 static int dh_compute_value(struct kpp_request *req)
@@ -158,7 +151,7 @@ static void dh_exit_tfm(struct crypto_kpp *tfm)
 {
struct dh_ctx *ctx = dh_

Re: general protection fault in asn1_ber_decoder

2017-11-06 Thread Eric Biggers

On Mon, Nov 06, 2017 at 10:36:00AM -0800, syzbot wrote:
> kasan: GPF could be caused by NULL-ptr deref or user memory access
> general protection fault:  [#1] SMP KASAN
> Dumping ftrace buffer:
>(ftrace buffer empty)
> Modules linked in:
> CPU: 3 PID: 2984 Comm: syzkaller229187 Not tainted
> 4.14.0-rc7-next-20171103+ #10
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> task: 88003ab66000 task.stack: 880039d58000
> RIP: 0010:asn1_ber_decoder+0x41e/0x1af0 lib/asn1_decoder.c:233
> RSP: 0018:880039d5f8d0 EFLAGS: 00010246
> RAX:  RBX:  RCX: 
> RDX:  RSI: 88006981bf00 RDI: 853f1920
> RBP: 880039d5fb88 R08: 0060 R09: 
> R10:  R11:  R12: 
> R13:  R14: 880039d5fb60 R15: dc00
> FS:  010ac880() GS:88006df0() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 20f49ffb CR3: 6aeb8000 CR4: 06e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  pkcs7_parse_message+0x2b3/0x730 crypto/asymmetric_keys/pkcs7_parser.c:139
>  verify_pkcs7_signature+0x8d/0x290 certs/system_keyring.c:216
>  pkcs7_preparse+0x7b/0xc0 crypto/asymmetric_keys/pkcs7_key_type.c:63
>  key_create_or_update+0x533/0x1040 security/keys/key.c:855
>  SYSC_add_key security/keys/keyctl.c:122 [inline]
>  SyS_add_key+0x18a/0x340 security/keys/keyctl.c:62
>  entry_SYSCALL_64_fastpath+0x1f/0xbe
> RIP: 0033:0x434f39
> RSP: 002b:7fff51fda138 EFLAGS: 0286 ORIG_RAX: 00f8
> RAX: ffda RBX: 004002b0 RCX: 00434f39
> RDX: 2000 RSI: 20f49ffb RDI: 20f4a000
> RBP: 0086 R08:  R09: 
> R10:  R11: 0286 R12: 
> R13: 004018b0 R14: 00401940 R15: 
> Code: 19 ff 48 8d 43 01 49 89 86 80 fe ff ff 48 89 85 a8 fd ff ff 48
> 8b 85 c0 fd ff ff 48 01 d8 48 89 c2 48 89 c1 48 c1 ea 03 83 e1 07
> <42> 0f b6 14 3a 38 ca 7f 08 84 d2 0f 85 cd 0f 00 00 0f b6 00 88
> RIP: asn1_ber_decoder+0x41e/0x1af0 lib/asn1_decoder.c:233 RSP:
> 880039d5f8d0

I'll be sending a fix for this.  The bug is an integer underflow in the
expression 'if (unlikely(dp >= datalen - 1))', which causes a NULL pointer
dereference if you try to add a "pkcs7_test" key with an empty payload (requires
CONFIG_PKCS7_TEST_KEY=y).

Eric

Re: general protection fault in asn1_ber_decoder

2017-11-06 Thread Eric Biggers

On Mon, Nov 06, 2017 at 10:05:45PM +, David Howells wrote:
> diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
> index fef5d2e114be..048de2c20ae9 100644
> --- a/lib/asn1_decoder.c
> +++ b/lib/asn1_decoder.c
> @@ -201,6 +201,13 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder,
>   if (datalen > 65535)
>   return -EMSGSIZE;
>  
> + /* We don't currently support 0-length messages - the underrun checks
> +  * will fail if datalen is 0 because we check against datalen - 1 with
> +  * unsigned arithmetic.
> +  */
> + if (datalen == 0)
> + return -EBADMSG;
> +

Hi David, you just beat me to it, but I don't think this is the best way to fix
the problem.  The length check just needs to be rewritten to not overflow.  Also
it seems there is another broken length check later in the function.  How about
this:

commit 8bbe85872c660c891eb978a037f590198319e3b2
Author: Eric Biggers 
Date:   Mon Nov 6 10:06:32 2017 -0800

KEYS: fix NULL pointer dereference during ASN.1 parsing

syzkaller reported a NULL pointer dereference in asn1_ber_decoder().  It
can be reproduced by the following command, assuming
CONFIG_PKCS7_TEST_KEY=y:

keyctl add pkcs7_test desc '' @s

The bug is that if the data buffer is empty, an integer underflow occurs
in the following check:

if (unlikely(dp >= datalen - 1))
goto data_overrun_error;

This results in the NULL data pointer being dereferenced.

Fix it by checking for 'datalen - dp < 2' instead.

Also fix the similar check for 'dp >= datalen - n' later in the same
function.  That one possibly could result in a buffer overread.

The NULL pointer dereference was reproducible using the "pkcs7_test" key
type but not the "asymmetric" key type because the "asymmetric" key type
checks for a 0-length payload before calling into the ASN.1 decoder but
the "pkcs7_test" key type does not.

The bug report was:

BUG: unable to handle kernel NULL pointer dereference at   
(null)
IP: asn1_ber_decoder+0x17f/0xe60 lib/asn1_decoder.c:233
PGD 7b708067 P4D 7b708067 PUD 7b6ee067 PMD 0
Oops:  [#1] SMP
Modules linked in:
CPU: 0 PID: 522 Comm: syz-executor1 Not tainted 4.14.0-rc8 #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.10.3-20171021_125229-anatol 04/01/2014
task: 9b6b3798c040 task.stack: 9b6b3797
RIP: 0010:asn1_ber_decoder+0x17f/0xe60 lib/asn1_decoder.c:233
RSP: 0018:9b6b37973c78 EFLAGS: 00010216
RAX:  RBX:  RCX: 021c
RDX: 814a04ed RSI: b1524066e000 RDI: 910759e0
RBP: 9b6b37973d60 R08: 0001 R09: 9b6b3caa4180
R10:  R11:  R12: 0002
R13:  R14:  R15: 
FS:  7f10ed1f2700() GS:9b6b3ea0() 
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2:  CR3: 7b6f3000 CR4: 06f0
Call Trace:
 pkcs7_parse_message+0xee/0x240 
crypto/asymmetric_keys/pkcs7_parser.c:139
 verify_pkcs7_signature+0x33/0x180 certs/system_keyring.c:216
 pkcs7_preparse+0x41/0x70 crypto/asymmetric_keys/pkcs7_key_type.c:63
 key_create_or_update+0x180/0x530 security/keys/key.c:855
 SYSC_add_key security/keys/keyctl.c:122 [inline]
 SyS_add_key+0xbf/0x250 security/keys/keyctl.c:62
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x4585c9
RSP: 002b:7f10ed1f1bd8 EFLAGS: 0216 ORIG_RAX: 00f8
RAX: ffda RBX: 7f10ed1f2700 RCX: 004585c9
RDX: 2000 RSI: 20008ffb RDI: 20008000
RBP:  R08:  R09: 
R10:  R11: 0216 R12: 7fff1b2260ae
R13: 7fff1b2260af R14: 7f10ed1f2700 R15: 
Code: dd ca ff 48 8b 45 88 48 83 e8 01 4c 39 f0 0f 86 a8 07 00 00 e8 53 
dd ca ff 49 8d 46 01 48 89 85 58 ff ff ff 48 8b 85 60 ff ff ff <42> 0f b6 0c 30 
89 c8 88 8d 75 ff ff ff 83 e0 1f 89 8d 28 ff ff
RIP: asn1_ber_decoder+0x17f/0xe60 lib/asn1_decoder.c:233 RSP: 
9b6b37973c78
CR2: 0000

Fixes: 42d5ec27f873 ("X.509: Add an ASN.1 decoder")
Reported-by: syzbot 
Cc:  # v3.7+
Signed-off-by: Eric Biggers 

diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index fef5d2e114be..1ef0cec38d78 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -228,7 +228,7 @@ int asn1_

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1650 matches

Mail list logo