Replace the vector load from memory sequence with a simple instruction
sequence to compose the tweak vector directly.

Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
 arch/arm64/crypto/aes-neonbs-core.S | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/crypto/aes-neonbs-core.S 
b/arch/arm64/crypto/aes-neonbs-core.S
index cf10ff8878a3..65982039fa36 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -730,11 +730,6 @@ ENDPROC(aesbs_cbc_decrypt)
        eor             \out\().16b, \out\().16b, \tmp\().16b
        .endm
 
-       .align          4
-.Lxts_mul_x:
-CPU_LE(        .quad           1, 0x87         )
-CPU_BE(        .quad           0x87, 1         )
-
        /*
         * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
         *                   int blocks, u8 iv[])
@@ -806,7 +801,9 @@ ENDPROC(__xts_crypt8)
        mov             x23, x4
        mov             x24, x5
 
-0:     ldr             q30, .Lxts_mul_x
+0:     movi            v30.2s, #0x1
+       movi            v25.2s, #0x87
+       uzp1            v30.4s, v30.4s, v25.4s
        ld1             {v25.16b}, [x24]
 
 99:    adr             x7, \do8
-- 
2.17.1

Reply via email to