Align ARM's hw instruction based AES implementation with other versions
that keep the key schedule in native endianness. This will allow us to
merge the various implementations going forward.

Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
 arch/arm/crypto/aes-ce-core.S | 20 ++++++++++----------
 arch/arm/crypto/aes-ce-glue.c |  9 +++------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index bc53bcaa772e..3692b8735ef7 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -91,19 +91,19 @@
 
        .macro          do_block, dround, fround
        cmp             r3, #12                 @ which key size?
-       vld1.8          {q10-q11}, [ip]!
+       vld1.32         {q10-q11}, [ip]!
        \dround         q8, q9
-       vld1.8          {q12-q13}, [ip]!
+       vld1.32         {q12-q13}, [ip]!
        \dround         q10, q11
-       vld1.8          {q10-q11}, [ip]!
+       vld1.32         {q10-q11}, [ip]!
        \dround         q12, q13
-       vld1.8          {q12-q13}, [ip]!
+       vld1.32         {q12-q13}, [ip]!
        \dround         q10, q11
        blo             0f                      @ AES-128: 10 rounds
-       vld1.8          {q10-q11}, [ip]!
+       vld1.32         {q10-q11}, [ip]!
        \dround         q12, q13
        beq             1f                      @ AES-192: 12 rounds
-       vld1.8          {q12-q13}, [ip]
+       vld1.32         {q12-q13}, [ip]
        \dround         q10, q11
 0:     \fround         q12, q13, q14
        bx              lr
@@ -152,8 +152,8 @@ ENDPROC(aes_decrypt_3x)
 
        .macro          prepare_key, rk, rounds
        add             ip, \rk, \rounds, lsl #4
-       vld1.8          {q8-q9}, [\rk]          @ load first 2 round keys
-       vld1.8          {q14}, [ip]             @ load last round key
+       vld1.32         {q8-q9}, [\rk]          @ load first 2 round keys
+       vld1.32         {q14}, [ip]             @ load last round key
        .endm
 
        /*
@@ -508,8 +508,8 @@ ENDPROC(ce_aes_sub)
         *                                        operation on round key *src
         */
 ENTRY(ce_aes_invert)
-       vld1.8          {q0}, [r1]
+       vld1.32         {q0}, [r1]
        aesimc.8        q0, q0
-       vst1.8          {q0}, [r0]
+       vst1.32         {q0}, [r0]
        bx              lr
 ENDPROC(ce_aes_invert)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 04ba66903674..e6da3e30018b 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -10,6 +10,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
@@ -80,21 +81,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, 
const u8 *in_key,
            key_len != AES_KEYSIZE_256)
                return -EINVAL;
 
-       memcpy(ctx->key_enc, in_key, key_len);
        ctx->key_length = key_len;
+       for (i = 0; i < kwords; i++)
+               ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
        kernel_neon_begin();
        for (i = 0; i < sizeof(rcon); i++) {
                u32 *rki = ctx->key_enc + (i * kwords);
                u32 *rko = rki + kwords;
 
-#ifndef CONFIG_CPU_BIG_ENDIAN
                rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
                rko[0] = rko[0] ^ rki[0] ^ rcon[i];
-#else
-               rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8);
-               rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24);
-#endif
                rko[1] = rko[0] ^ rki[1];
                rko[2] = rko[1] ^ rki[2];
                rko[3] = rko[2] ^ rki[3];
-- 
2.20.1

Reply via email to