On Mon, Oct 07, 2019 at 06:45:48PM +0200, Ard Biesheuvel wrote:
> diff --git a/arch/arm/crypto/chacha-scalar-core.S 
> b/arch/arm/crypto/chacha-scalar-core.S
> index 2140319b64a0..0970ae107590 100644
> --- a/arch/arm/crypto/chacha-scalar-core.S
> +++ b/arch/arm/crypto/chacha-scalar-core.S
> @@ -41,14 +41,6 @@
>       X14     .req    r12
>       X15     .req    r14
>  
> -.Lexpand_32byte_k:
> -     // "expand 32-byte k"
> -     .word   0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
> -
> -#ifdef __thumb2__
> -#  define adrl adr
> -#endif
> -
>  .macro __rev         out, in,  t0, t1, t2
>  .if __LINUX_ARM_ARCH__ >= 6
>       rev             \out, \in
> @@ -391,61 +383,65 @@
>  .endm        // _chacha
>  
>  /*
> - * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
> - *                const u32 iv[4]);
> + * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
> + *                u32 *state, int nrounds);
>   */
> -ENTRY(chacha20_arm)
> +ENTRY(chacha_doarm)
>       cmp             r2, #0                  // len == 0?
>       reteq           lr
>  
> +     ldr             ip, [sp]
> +     cmp             ip, #12
> +
>       push            {r0-r2,r4-r11,lr}
>  
>       // Push state x0-x15 onto stack.
>       // Also store an extra copy of x10-x11 just before the state.
>  
> -     ldr             r4, [sp, #48]           // iv
> -     mov             r0, sp
> -     sub             sp, #80
> -
> -     // iv: x12-x15
> -     ldm             r4, {X12,X13,X14,X15}
> -     stmdb           r0!, {X12,X13,X14,X15}
> +     add             X12, r3, #48
> +     ldm             X12, {X12,X13,X14,X15}
> +     push            {X12,X13,X14,X15}
> +     sub             sp, sp, #64
>  
> -     // key: x4-x11
> -     __ldrd          X8_X10, X9_X11, r3, 24
> +     __ldrd          X8_X10, X9_X11, r3, 40
>       __strd          X8_X10, X9_X11, sp, 8
> -     stmdb           r0!, {X8_X10, X9_X11}
> -     ldm             r3, {X4-X9_X11}
> -     stmdb           r0!, {X4-X9_X11}
> -
> -     // constants: x0-x3
> -     adrl            X3, .Lexpand_32byte_k
> -     ldm             X3, {X0-X3}
> +     __strd          X8_X10, X9_X11, sp, 56
> +     ldm             r3, {X0-X9_X11}
>       __strd          X0, X1, sp, 16
>       __strd          X2, X3, sp, 24
> +     __strd          X4, X5, sp, 32
> +     __strd          X6, X7, sp, 40
> +     __strd          X8_X10, X9_X11, sp, 48
>  
> +     beq             1f
>       _chacha         20
>  
> -     add             sp, #76
> +0:   add             sp, #76
>       pop             {r4-r11, pc}
> -ENDPROC(chacha20_arm)
> +
> +1:   _chacha         12
> +     b               0b
> +ENDPROC(chacha_doarm)
>  
>  /*
> - * void hchacha20_arm(const u32 state[16], u32 out[8]);
> + * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
>   */
> -ENTRY(hchacha20_arm)
> +ENTRY(hchacha_block_arm)
>       push            {r1,r4-r11,lr}
>  
> +     cmp             r2, #12                 // ChaCha12 ?
> +
>       mov             r14, r0
>       ldmia           r14!, {r0-r11}          // load x0-x11
>       push            {r10-r11}               // store x10-x11 to stack
>       ldm             r14, {r10-r12,r14}      // load x12-x15
>       sub             sp, #8
>  
> +     beq             1f
>       _chacha_permute 20
>  
>       // Skip over (unused0-unused1, x10-x11)
> -     add             sp, #16
> +0:   add             sp, #16
>  
>       // Fix up rotations of x12-x15
>       ror             X12, X12, #drot
> @@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
>       stm             r4, {X0,X1,X2,X3,X12,X13,X14,X15}
>  
>       pop             {r4-r11,pc}
> -ENDPROC(hchacha20_arm)
> +
> +1:   _chacha_permute 12
> +     b               0b
> +ENDPROC(hchacha_block_arm)
> -- 

FYI, I've also had a version of this code supporting both the 12 and 20-round
variants sitting around here:
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/commit/?h=chacha-arm-scalar&id=fc51d8012742f591da3204b327a865f6109d472a
I'll take a closer look at this later, but you might want to take a quick look
at what I did, just in case I happened to do anything in a better way.

- Eric

Reply via email to