These wire Andy Polyakov's implementations up to the kernel. We make a
few small changes to the assembly:

- Entries and exits use the proper kernel convention macro.
- CPU feature checking is done in C by the glue code, so that has been
  removed from the assembly.
- The function names have been renamed to fit kernel conventions.
- Labels have been renamed to fit kernel conventions.
- The neon code can jump to the scalar code when it makes sense to do
  so.

Signed-off-by: Jason A. Donenfeld <ja...@zx2c4.com>
Cc: Samuel Neves <sne...@dei.uc.pt>
Cc: Andy Lutomirski <l...@kernel.org>
Cc: Greg KH <gre...@linuxfoundation.org>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumas...@gmail.com>
Cc: Russell King <li...@armlinux.org.uk>
Cc: linux-arm-ker...@lists.infradead.org
---
 lib/zinc/Makefile                             |   2 +
 lib/zinc/poly1305/poly1305-arm-glue.h         | 119 ++++++++++++++
 ...ly1305-arm-cryptogams.S => poly1305-arm.S} | 147 ++++++------------
 ...05-arm64-cryptogams.S => poly1305-arm64.S} | 103 ++++--------
 lib/zinc/poly1305/poly1305.c                  |   2 +
 5 files changed, 198 insertions(+), 175 deletions(-)
 create mode 100644 lib/zinc/poly1305/poly1305-arm-glue.h
 rename lib/zinc/poly1305/{poly1305-arm-cryptogams.S => poly1305-arm.S} (91%)
 rename lib/zinc/poly1305/{poly1305-arm64-cryptogams.S => poly1305-arm64.S} 
(90%)

diff --git a/lib/zinc/Makefile b/lib/zinc/Makefile
index a8943d960b6a..c09fd3de60f9 100644
--- a/lib/zinc/Makefile
+++ b/lib/zinc/Makefile
@@ -12,4 +12,6 @@ obj-$(CONFIG_ZINC_CHACHA20) += zinc_chacha20.o
 
 zinc_poly1305-y := poly1305/poly1305.o
 zinc_poly1305-$(CONFIG_ZINC_ARCH_X86_64) += poly1305/poly1305-x86_64.o
+zinc_poly1305-$(CONFIG_ZINC_ARCH_ARM) += poly1305/poly1305-arm.o
+zinc_poly1305-$(CONFIG_ZINC_ARCH_ARM64) += poly1305/poly1305-arm64.o
 obj-$(CONFIG_ZINC_POLY1305) += zinc_poly1305.o
diff --git a/lib/zinc/poly1305/poly1305-arm-glue.h 
b/lib/zinc/poly1305/poly1305-arm-glue.h
new file mode 100644
index 000000000000..ddeb58a2b547
--- /dev/null
+++ b/lib/zinc/poly1305/poly1305-arm-glue.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <ja...@zx2c4.com>. All Rights 
Reserved.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
+asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
+                                   const u32 padbit);
+asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
+#if defined(CONFIG_KERNEL_MODE_NEON)
+asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t 
len,
+                                    const u32 padbit);
+asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
+#endif
+
+static bool poly1305_use_neon __ro_after_init;
+
+static void __init poly1305_fpu_init(void)
+{
+#if defined(CONFIG_ARM64)
+       poly1305_use_neon = elf_hwcap & HWCAP_ASIMD;
+#elif defined(CONFIG_ARM)
+       poly1305_use_neon = elf_hwcap & HWCAP_NEON;
+#endif
+}
+
+#if defined(CONFIG_ARM64)
+struct poly1305_arch_internal {
+       union {
+               u32 h[5];
+               struct {
+                       u64 h0, h1, h2;
+               };
+       };
+       u32 is_base2_26;
+       u64 r[2];
+};
+#elif defined(CONFIG_ARM)
+struct poly1305_arch_internal {
+       union {
+               u32 h[5];
+               struct {
+                       u64 h0, h1;
+                       u32 h2;
+               } __packed;
+       };
+       u32 r[4];
+       u32 is_base2_26;
+};
+#endif
+
+#if defined(CONFIG_KERNEL_MODE_NEON)
+static void convert_to_base2_64(void *ctx)
+{
+       struct poly1305_arch_internal *state = ctx;
+       u32 cy;
+
+       if (!state->is_base2_26)
+               return;
+
+       cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
+       cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
+       cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
+       cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
+       state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | 
state->h[0];
+       state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | 
(state->h[2] >> 12);
+       state->h2 = state->h[4] >> 24;
+#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
+       cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
+       state->h2 &= 3;
+       state->h0 += cy;
+       state->h1 += (cy = ULT(state->h0, cy));
+       state->h2 += ULT(state->h1, cy);
+#undef ULT
+       state->is_base2_26 = 0;
+}
+#endif
+
+static inline bool poly1305_init_arch(void *ctx,
+                                     const u8 key[POLY1305_KEY_SIZE])
+{
+       poly1305_init_arm(ctx, key);
+       return true;
+}
+
+static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
+                                       const size_t len, const u32 padbit,
+                                       simd_context_t *simd_context)
+{
+#if defined(CONFIG_KERNEL_MODE_NEON)
+       if (poly1305_use_neon && simd_use(simd_context)) {
+               poly1305_blocks_neon(ctx, inp, len, padbit);
+               return true;
+       }
+       convert_to_base2_64(ctx);
+#endif
+
+       poly1305_blocks_arm(ctx, inp, len, padbit);
+       return true;
+}
+
+static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
+                                     const u32 nonce[4],
+                                     simd_context_t *simd_context)
+{
+#if defined(CONFIG_KERNEL_MODE_NEON)
+       if (poly1305_use_neon && simd_use(simd_context)) {
+               poly1305_emit_neon(ctx, mac, nonce);
+               return true;
+       }
+       convert_to_base2_64(ctx);
+#endif
+
+       poly1305_emit_arm(ctx, mac, nonce);
+       return true;
+}
diff --git a/lib/zinc/poly1305/poly1305-arm-cryptogams.S 
b/lib/zinc/poly1305/poly1305-arm.S
similarity index 91%
rename from lib/zinc/poly1305/poly1305-arm-cryptogams.S
rename to lib/zinc/poly1305/poly1305-arm.S
index 884b465030e4..4a0e9d451119 100644
--- a/lib/zinc/poly1305/poly1305-arm-cryptogams.S
+++ b/lib/zinc/poly1305/poly1305-arm.S
@@ -1,9 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <ja...@zx2c4.com>. All Rights 
Reserved.
  * Copyright (C) 2006-2017 CRYPTOGAMS by <ap...@openssl.org>. All Rights 
Reserved.
+ *
+ * This is based in part on Andy Polyakov's implementation from CRYPTOGAMS.
  */
 
-#include "arm_arch.h"
+#include <linux/linkage.h>
 
 .text
 #if defined(__thumb2__)
@@ -13,13 +16,8 @@
 .code  32
 #endif
 
-.globl poly1305_emit
-.globl poly1305_blocks
-.globl poly1305_init
-.type  poly1305_init,%function
 .align 5
-poly1305_init:
-.Lpoly1305_init:
+ENTRY(poly1305_init_arm)
        stmdb   sp!,{r4-r11}
 
        eor     r3,r3,r3
@@ -38,10 +36,6 @@ poly1305_init:
        moveq   r0,#0
        beq     .Lno_key
 
-#if    __ARM_MAX_ARCH__>=7
-       adr     r11,.Lpoly1305_init
-       ldr     r12,.LOPENSSL_armcap
-#endif
        ldrb    r4,[r1,#0]
        mov     r10,#0x0fffffff
        ldrb    r5,[r1,#1]
@@ -56,12 +50,6 @@ poly1305_init:
        ldrb    r7,[r1,#6]
        and     r4,r4,r10
 
-#if    __ARM_MAX_ARCH__>=7
-       ldr     r12,[r11,r12]           @ OPENSSL_armcap_P
-# ifdef        __APPLE__
-       ldr     r12,[r12]
-# endif
-#endif
        ldrb    r8,[r1,#7]
        orr     r5,r5,r6,lsl#8
        ldrb    r6,[r1,#8]
@@ -71,35 +59,6 @@ poly1305_init:
        ldrb    r8,[r1,#10]
        and     r5,r5,r3
 
-#if    __ARM_MAX_ARCH__>=7
-       tst     r12,#ARMV7_NEON         @ check for NEON
-# ifdef        __APPLE__
-       adr     r9,poly1305_blocks_neon
-       adr     r11,poly1305_blocks
-#  ifdef __thumb2__
-       it      ne
-#  endif
-       movne   r11,r9
-       adr     r12,poly1305_emit
-       adr     r10,poly1305_emit_neon
-#  ifdef __thumb2__
-       it      ne
-#  endif
-       movne   r12,r10
-# else
-#  ifdef __thumb2__
-       itete   eq
-#  endif
-       addeq   r12,r11,#(poly1305_emit-.Lpoly1305_init)
-       addne   r12,r11,#(poly1305_emit_neon-.Lpoly1305_init)
-       addeq   r11,r11,#(poly1305_blocks-.Lpoly1305_init)
-       addne   r11,r11,#(poly1305_blocks_neon-.Lpoly1305_init)
-# endif
-# ifdef        __thumb2__
-       orr     r12,r12,#1      @ thumb-ify address
-       orr     r11,r11,#1
-# endif
-#endif
        ldrb    r9,[r1,#11]
        orr     r6,r6,r7,lsl#8
        ldrb    r7,[r1,#12]
@@ -118,26 +77,20 @@ poly1305_init:
        str     r6,[r0,#8]
        and     r7,r7,r3
        str     r7,[r0,#12]
-#if    __ARM_MAX_ARCH__>=7
-       stmia   r2,{r11,r12}            @ fill functions table
-       mov     r0,#1
-#else
-       mov     r0,#0
-#endif
 .Lno_key:
        ldmia   sp!,{r4-r11}
-#if    __ARM_ARCH__>=5
+#if __LINUX_ARM_ARCH__ >= 5
        bx      lr                              @ bx    lr
 #else
        tst     lr,#1
        moveq   pc,lr                   @ be binary compatible with V4, yet
        .word   0xe12fff1e                      @ interoperable with Thumb 
ISA:-)
 #endif
-.size  poly1305_init,.-poly1305_init
-.type  poly1305_blocks,%function
+ENDPROC(poly1305_init_arm)
+
 .align 5
-poly1305_blocks:
-.Lpoly1305_blocks:
+ENTRY(poly1305_blocks_arm)
+.Lpoly1305_blocks_arm:
        stmdb   sp!,{r3-r11,lr}
 
        ands    r2,r2,#-16
@@ -158,11 +111,11 @@ poly1305_blocks:
        b       .Loop
 
 .Loop:
-#if __ARM_ARCH__<7
+#if __LINUX_ARM_ARCH__ < 7
        ldrb    r0,[lr],#16             @ load input
-# ifdef        __thumb2__
+#ifdef __thumb2__
        it      hi
-# endif
+#endif
        addhi   r8,r8,#1                @ 1<<128
        ldrb    r1,[lr,#-15]
        ldrb    r2,[lr,#-14]
@@ -201,19 +154,19 @@ poly1305_blocks:
        orr     r3,r2,r3,lsl#24
 #else
        ldr     r0,[lr],#16             @ load input
-# ifdef        __thumb2__
+#ifdef __thumb2__
        it      hi
-# endif
+#endif
        addhi   r8,r8,#1                @ padbit
        ldr     r1,[lr,#-12]
        ldr     r2,[lr,#-8]
        ldr     r3,[lr,#-4]
-# ifdef        __ARMEB__
+#ifdef __ARMEB__
        rev     r0,r0
        rev     r1,r1
        rev     r2,r2
        rev     r3,r3
-# endif
+#endif
        adds    r4,r4,r0                @ accumulate input
        str     lr,[sp,#8]              @ offload input pointer
        adcs    r5,r5,r1
@@ -283,7 +236,7 @@ poly1305_blocks:
        stmia   r0,{r4-r8}              @ store the result
 
 .Lno_data:
-#if    __ARM_ARCH__>=5
+#if __LINUX_ARM_ARCH__ >= 5
        ldmia   sp!,{r3-r11,pc}
 #else
        ldmia   sp!,{r3-r11,lr}
@@ -291,13 +244,12 @@ poly1305_blocks:
        moveq   pc,lr                   @ be binary compatible with V4, yet
        .word   0xe12fff1e                      @ interoperable with Thumb 
ISA:-)
 #endif
-.size  poly1305_blocks,.-poly1305_blocks
-.type  poly1305_emit,%function
+ENDPROC(poly1305_blocks_arm)
+
 .align 5
-poly1305_emit:
+ENTRY(poly1305_emit_arm)
        stmdb   sp!,{r4-r11}
 .Lpoly1305_emit_enter:
-
        ldmia   r0,{r3-r7}
        adds    r8,r3,#5                @ compare to modulus
        adcs    r9,r4,#0
@@ -332,13 +284,13 @@ poly1305_emit:
        adcs    r5,r5,r10
        adc     r6,r6,r11
 
-#if __ARM_ARCH__>=7
-# ifdef __ARMEB__
+#if __LINUX_ARM_ARCH__ >= 7
+#ifdef __ARMEB__
        rev     r3,r3
        rev     r4,r4
        rev     r5,r5
        rev     r6,r6
-# endif
+#endif
        str     r3,[r1,#0]
        str     r4,[r1,#4]
        str     r5,[r1,#8]
@@ -377,20 +329,22 @@ poly1305_emit:
        strb    r6,[r1,#15]
 #endif
        ldmia   sp!,{r4-r11}
-#if    __ARM_ARCH__>=5
+#if __LINUX_ARM_ARCH__ >= 5
        bx      lr                              @ bx    lr
 #else
        tst     lr,#1
        moveq   pc,lr                   @ be binary compatible with V4, yet
        .word   0xe12fff1e                      @ interoperable with Thumb 
ISA:-)
 #endif
-.size  poly1305_emit,.-poly1305_emit
-#if    __ARM_MAX_ARCH__>=7
+ENDPROC(poly1305_emit_arm)
+
+
+#ifdef CONFIG_KERNEL_MODE_NEON
 .fpu   neon
 
-.type  poly1305_init_neon,%function
 .align 5
-poly1305_init_neon:
+ENTRY(poly1305_init_neon)
+.Lpoly1305_init_neon:
        ldr     r4,[r0,#20]             @ load key base 2^32
        ldr     r5,[r0,#24]
        ldr     r6,[r0,#28]
@@ -600,11 +554,10 @@ poly1305_init_neon:
        vst1.32         {d8[1]},[r7]
 
        bx      lr                              @ bx    lr
-.size  poly1305_init_neon,.-poly1305_init_neon
+ENDPROC(poly1305_init_neon)
 
-.type  poly1305_blocks_neon,%function
 .align 5
-poly1305_blocks_neon:
+ENTRY(poly1305_blocks_neon)
        ldr     ip,[r0,#36]             @ is_base2_26
        ands    r2,r2,#-16
        beq     .Lno_data_neon
@@ -612,7 +565,7 @@ poly1305_blocks_neon:
        cmp     r2,#64
        bhs     .Lenter_neon
        tst     ip,ip                   @ is_base2_26?
-       beq     .Lpoly1305_blocks
+       beq     .Lpoly1305_blocks_arm
 
 .Lenter_neon:
        stmdb   sp!,{r4-r7}
@@ -622,7 +575,7 @@ poly1305_blocks_neon:
        bne     .Lbase2_26_neon
 
        stmdb   sp!,{r1-r3,lr}
-       bl      poly1305_init_neon
+       bl      .Lpoly1305_init_neon
 
        ldr     r4,[r0,#0]              @ load hash value base 2^32
        ldr     r5,[r0,#4]
@@ -686,12 +639,12 @@ poly1305_blocks_neon:
        sub             r2,r2,#16
        add             r4,r1,#32
 
-# ifdef        __ARMEB__
+#ifdef __ARMEB__
        vrev32.8        q10,q10
        vrev32.8        q13,q13
        vrev32.8        q11,q11
        vrev32.8        q12,q12
-# endif
+#endif
        vsri.u32        d28,d26,#8      @ base 2^32 -> base 2^26
        vshl.u32        d26,d26,#18
 
@@ -735,12 +688,12 @@ poly1305_blocks_neon:
        addhi           r7,r0,#(48+1*9*4)
        addhi           r6,r0,#(48+3*9*4)
 
-# ifdef        __ARMEB__
+#ifdef __ARMEB__
        vrev32.8        q10,q10
        vrev32.8        q13,q13
        vrev32.8        q11,q11
        vrev32.8        q12,q12
-# endif
+#endif
        vsri.u32        q14,q13,#8              @ base 2^32 -> base 2^26
        vshl.u32        q13,q13,#18
 
@@ -866,12 +819,12 @@ poly1305_blocks_neon:
 
        vld4.32         {d20,d22,d24,d26},[r1]  @ inp[0:1]
        add             r1,r1,#64
-# ifdef        __ARMEB__
+#ifdef __ARMEB__
        vrev32.8        q10,q10
        vrev32.8        q11,q11
        vrev32.8        q12,q12
        vrev32.8        q13,q13
-# endif
+#endif
 
        @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
        @ lazy reduction interleaved with base 2^32 -> base 2^26 of
@@ -1086,11 +1039,10 @@ poly1305_blocks_neon:
        ldmia   sp!,{r4-r7}
 .Lno_data_neon:
        bx      lr                                      @ bx    lr
-.size  poly1305_blocks_neon,.-poly1305_blocks_neon
+ENDPROC(poly1305_blocks_neon)
 
-.type  poly1305_emit_neon,%function
 .align 5
-poly1305_emit_neon:
+ENTRY(poly1305_emit_neon)
        ldr     ip,[r0,#36]             @ is_base2_26
 
        stmdb   sp!,{r4-r11}
@@ -1144,12 +1096,12 @@ poly1305_emit_neon:
        adcs    r5,r5,r10
        adc     r6,r6,r11
 
-# ifdef __ARMEB__
+#ifdef __ARMEB__
        rev     r3,r3
        rev     r4,r4
        rev     r5,r5
        rev     r6,r6
-# endif
+#endif
        str     r3,[r1,#0]              @ store the result
        str     r4,[r1,#4]
        str     r5,[r1,#8]
@@ -1157,16 +1109,9 @@ poly1305_emit_neon:
 
        ldmia   sp!,{r4-r11}
        bx      lr                              @ bx    lr
-.size  poly1305_emit_neon,.-poly1305_emit_neon
+ENDPROC(poly1305_emit_neon)
 
 .align 5
 .Lzeros:
 .long  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.LOPENSSL_armcap:
-.word  OPENSSL_armcap_P-.Lpoly1305_init
-#endif
-.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by <ap...@openssl.org>"
-.align 2
-#if    __ARM_MAX_ARCH__>=7
-.comm   OPENSSL_armcap_P,4,4
 #endif
diff --git a/lib/zinc/poly1305/poly1305-arm64-cryptogams.S 
b/lib/zinc/poly1305/poly1305-arm64.S
similarity index 90%
rename from lib/zinc/poly1305/poly1305-arm64-cryptogams.S
rename to lib/zinc/poly1305/poly1305-arm64.S
index 0ecb50a83ec0..84a654479cac 100644
--- a/lib/zinc/poly1305/poly1305-arm64-cryptogams.S
+++ b/lib/zinc/poly1305/poly1305-arm64.S
@@ -1,21 +1,16 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <ja...@zx2c4.com>. All Rights 
Reserved.
  * Copyright (C) 2006-2017 CRYPTOGAMS by <ap...@openssl.org>. All Rights 
Reserved.
+ *
+ * This is based in part on Andy Polyakov's implementation from CRYPTOGAMS.
  */
 
-#include "arm_arch.h"
-
+#include <linux/linkage.h>
 .text
 
-// forward "declarations" are required for Apple
-
-.globl poly1305_blocks
-.globl poly1305_emit
-
-.globl poly1305_init
-.type  poly1305_init,%function
 .align 5
-poly1305_init:
+ENTRY(poly1305_init_arm)
        cmp     x1,xzr
        stp     xzr,xzr,[x0]            // zero hash value
        stp     xzr,xzr,[x0,#16]        // [along with is_base2_26]
@@ -23,17 +18,9 @@ poly1305_init:
        csel    x0,xzr,x0,eq
        b.eq    .Lno_key
 
-#ifdef __ILP32__
-       ldrsw   x11,.LOPENSSL_armcap_P
-#else
-       ldr     x11,.LOPENSSL_armcap_P
-#endif
-       adr     x10,.LOPENSSL_armcap_P
-
        ldp     x7,x8,[x1]              // load key
        mov     x9,#0xfffffffc0fffffff
        movk    x9,#0x0fff,lsl#48
-       ldr     w17,[x10,x11]
 #ifdef __ARMEB__
        rev     x7,x7                   // flip bytes
        rev     x8,x8
@@ -43,30 +30,12 @@ poly1305_init:
        and     x8,x8,x9                // &=0ffffffc0ffffffc
        stp     x7,x8,[x0,#32]  // save key value
 
-       tst     w17,#ARMV7_NEON
-
-       adr     x12,poly1305_blocks
-       adr     x7,poly1305_blocks_neon
-       adr     x13,poly1305_emit
-       adr     x8,poly1305_emit_neon
-
-       csel    x12,x12,x7,eq
-       csel    x13,x13,x8,eq
-
-#ifdef __ILP32__
-       stp     w12,w13,[x2]
-#else
-       stp     x12,x13,[x2]
-#endif
-
-       mov     x0,#1
 .Lno_key:
        ret
-.size  poly1305_init,.-poly1305_init
+ENDPROC(poly1305_init_arm)
 
-.type  poly1305_blocks,%function
 .align 5
-poly1305_blocks:
+ENTRY(poly1305_blocks_arm)
        ands    x2,x2,#-16
        b.eq    .Lno_data
 
@@ -126,11 +95,10 @@ poly1305_blocks:
 
 .Lno_data:
        ret
-.size  poly1305_blocks,.-poly1305_blocks
+ENDPROC(poly1305_blocks_arm)
 
-.type  poly1305_emit,%function
 .align 5
-poly1305_emit:
+ENTRY(poly1305_emit_arm)
        ldp     x4,x5,[x0]              // load hash base 2^64
        ldr     x6,[x0,#16]
        ldp     x10,x11,[x2]    // load nonce
@@ -157,10 +125,10 @@ poly1305_emit:
        stp     x4,x5,[x1]              // write result
 
        ret
-.size  poly1305_emit,.-poly1305_emit
-.type  poly1305_mult,%function
+ENDPROC(poly1305_emit_arm)
+
 .align 5
-poly1305_mult:
+__poly1305_mult:
        mul     x12,x4,x7               // h0*r0
        umulh   x13,x4,x7
 
@@ -193,11 +161,8 @@ poly1305_mult:
        adc     x6,x6,xzr
 
        ret
-.size  poly1305_mult,.-poly1305_mult
 
-.type  poly1305_splat,%function
-.align 5
-poly1305_splat:
+__poly1305_splat:
        and     x12,x4,#0x03ffffff      // base 2^64 -> base 2^26
        ubfx    x13,x4,#26,#26
        extr    x14,x5,x4,#52
@@ -220,15 +185,14 @@ poly1305_splat:
        str     w15,[x0,#16*8]  // s4
 
        ret
-.size  poly1305_splat,.-poly1305_splat
 
-.type  poly1305_blocks_neon,%function
+#ifdef CONFIG_KERNEL_MODE_NEON
 .align 5
-poly1305_blocks_neon:
+ENTRY(poly1305_blocks_neon)
        ldr     x17,[x0,#24]
        cmp     x2,#128
        b.hs    .Lblocks_neon
-       cbz     x17,poly1305_blocks
+       cbz     x17,poly1305_blocks_arm
 
 .Lblocks_neon:
        stp     x29,x30,[sp,#-80]!
@@ -276,7 +240,7 @@ poly1305_blocks_neon:
        adcs    x5,x5,x13
        adc     x6,x6,x3
 
-       bl      poly1305_mult
+       bl      __poly1305_mult
        ldr     x30,[sp,#8]
 
        cbz     x3,.Lstore_base2_64_neon
@@ -322,7 +286,7 @@ poly1305_blocks_neon:
        adcs    x5,x5,x13
        adc     x6,x6,x3
 
-       bl      poly1305_mult
+       bl      __poly1305_mult
 
 .Linit_neon:
        and     x10,x4,#0x03ffffff      // base 2^64 -> base 2^26
@@ -349,19 +313,19 @@ poly1305_blocks_neon:
        mov     x5,x8
        mov     x6,xzr
        add     x0,x0,#48+12
-       bl      poly1305_splat
+       bl      __poly1305_splat
 
-       bl      poly1305_mult           // r^2
+       bl      __poly1305_mult         // r^2
        sub     x0,x0,#4
-       bl      poly1305_splat
+       bl      __poly1305_splat
 
-       bl      poly1305_mult           // r^3
+       bl      __poly1305_mult         // r^3
        sub     x0,x0,#4
-       bl      poly1305_splat
+       bl      __poly1305_splat
 
-       bl      poly1305_mult           // r^4
+       bl      __poly1305_mult         // r^4
        sub     x0,x0,#4
-       bl      poly1305_splat
+       bl      __poly1305_splat
        ldr     x30,[sp,#8]
 
        add     x16,x1,#32
@@ -801,13 +765,12 @@ poly1305_blocks_neon:
 .Lno_data_neon:
        ldr     x29,[sp],#80
        ret
-.size  poly1305_blocks_neon,.-poly1305_blocks_neon
+ENDPROC(poly1305_blocks_neon)
 
-.type  poly1305_emit_neon,%function
 .align 5
-poly1305_emit_neon:
+ENTRY(poly1305_emit_neon)
        ldr     x17,[x0,#24]
-       cbz     x17,poly1305_emit
+       cbz     x17,poly1305_emit_arm
 
        ldp     w10,w11,[x0]            // load hash value base 2^26
        ldp     w12,w13,[x0,#8]
@@ -853,17 +816,9 @@ poly1305_emit_neon:
        stp     x4,x5,[x1]              // write result
 
        ret
-.size  poly1305_emit_neon,.-poly1305_emit_neon
+ENDPROC(poly1305_emit_neon)
 
 .align 5
 .Lzeros:
 .long  0,0,0,0,0,0,0,0
-.LOPENSSL_armcap_P:
-#ifdef __ILP32__
-.long  OPENSSL_armcap_P-.
-#else
-.quad  OPENSSL_armcap_P-.
 #endif
-.byte  
80,111,108,121,49,51,48,53,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 2
-.align 2
diff --git a/lib/zinc/poly1305/poly1305.c b/lib/zinc/poly1305/poly1305.c
index 2ae1b3cb66cd..647aa3354d38 100644
--- a/lib/zinc/poly1305/poly1305.c
+++ b/lib/zinc/poly1305/poly1305.c
@@ -17,6 +17,8 @@
 
 #if defined(CONFIG_ZINC_ARCH_X86_64)
 #include "poly1305-x86_64-glue.h"
+#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
+#include "poly1305-arm-glue.h"
 #else
 static inline bool poly1305_init_arch(void *ctx,
                                      const u8 key[POLY1305_KEY_SIZE])
-- 
2.19.0

Reply via email to