From: Vineet Gupta <vgu...@kernel.org> Upcoming ARCv3 lacks ZOL support, so provide alternative uaccess implementations based on 64-bit memory operations.
Signed-off-by: Vineet Gupta <vgu...@kernel.org> --- arch/arc/include/asm/asm-macro-ll64-emul.h | 28 ++++ arch/arc/include/asm/asm-macro-ll64.h | 20 +++ arch/arc/include/asm/assembler.h | 12 ++ arch/arc/include/asm/uaccess.h | 12 ++ arch/arc/lib/Makefile | 2 + arch/arc/lib/uaccess.S | 144 +++++++++++++++++++++ 6 files changed, 218 insertions(+) create mode 100644 arch/arc/include/asm/asm-macro-ll64-emul.h create mode 100644 arch/arc/include/asm/asm-macro-ll64.h create mode 100644 arch/arc/lib/uaccess.S diff --git a/arch/arc/include/asm/asm-macro-ll64-emul.h b/arch/arc/include/asm/asm-macro-ll64-emul.h new file mode 100644 index 000000000000..886320cc74ad --- /dev/null +++ b/arch/arc/include/asm/asm-macro-ll64-emul.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Abstraction for 64-bit load/store: + * - Emulate 64-bit access with two 32-bit load/stores. + * - In the non-emulated case, output register pair r<N>:r<N+1> + * so macro takes only 1 output arg and determines the 2nd. + */ + +.macro ST64.ab d, s, incr + st.ab \d, [\s, \incr / 2] + .ifeqs "\d", "r4" + st.ab r5, [\s, \incr / 2] + .endif + .ifeqs "\d", "r6" + st.ab r7, [\s, \incr / 2] + .endif +.endm + +.macro LD64.ab d, s, incr + ld.ab \d, [\s, \incr / 2] + .ifeqs "\d", "r4" + ld.ab r5, [\s, \incr / 2] + .endif + .ifeqs "\d", "r6" + ld.ab r7, [\s, \incr / 2] + .endif +.endm diff --git a/arch/arc/include/asm/asm-macro-ll64.h b/arch/arc/include/asm/asm-macro-ll64.h new file mode 100644 index 000000000000..89e05c923a26 --- /dev/null +++ b/arch/arc/include/asm/asm-macro-ll64.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Abstraction for 64-bit load/store: + * - Single instruction to double load/store + * - output register pair r<N>:r<N+1> but only + * first register needs to be specified + */ + +.irp xx,,.ab +.macro ST64\xx d, s, off=0 + std\xx \d, [\s, \off] +.endm +.endr + +.irp xx,,.ab +.macro LD64\xx d, s, off=0 + ldd\xx \d, [\s, \off] +.endm +.endr diff --git a/arch/arc/include/asm/assembler.h b/arch/arc/include/asm/assembler.h index 426488ef27d4..1d69390c22ba 100644 --- a/arch/arc/include/asm/assembler.h +++ b/arch/arc/include/asm/assembler.h @@ -5,6 +5,12 @@ #ifdef __ASSEMBLY__ +#ifdef CONFIG_ARC_HAS_LL64 +#include <asm/asm-macro-ll64.h> +#else +#include <asm/asm-macro-ll64-emul.h> +#endif + #ifdef CONFIG_ARC_LACKS_ZOL #include <asm/asm-macro-dbnz.h> #else @@ -13,6 +19,12 @@ #else /* !__ASSEMBLY__ */ +#ifdef CONFIG_ARC_HAS_LL64 +asm(".include \"asm/asm-macro-ll64.h\"\n"); +#else +asm(".include \"asm/asm-macro-ll64-emul.h\"\n"); +#endif + /* * ARCv2 cores have both LPcc and DBNZ instructions (starting 3.5a release). * But in this context, LP present implies DBNZ not available (ARCompact ISA) diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 9b009e64e79c..f5b97d977c1b 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -163,6 +163,7 @@ : "+r" (ret) \ : "r" (src), "r" (dst), "ir" (-EFAULT)) +#ifndef CONFIG_ARC_LACKS_ZOL static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) @@ -660,6 +661,17 @@ static inline unsigned long __clear_user(void __user *to, unsigned long n) #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER +#else + +extern unsigned long raw_copy_from_user(void *to, const void __user *from, + unsigned long n); +extern unsigned long raw_copy_to_user(void *to, const void __user *from, + unsigned long n); + +extern unsigned long __clear_user(void __user *to, unsigned long n); + +#endif + #define __clear_user __clear_user #include <asm/segment.h> diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile index 30158ae69fd4..87d18f5013dc 100644 --- a/arch/arc/lib/Makefile +++ b/arch/arc/lib/Makefile @@ -13,3 +13,5 @@ lib-$(CONFIG_ISA_ARCV2) +=memcpy-archs-unaligned.o else lib-$(CONFIG_ISA_ARCV2) +=memcpy-archs.o endif + +lib-$(CONFIG_ARC_LACKS_ZOL) += uaccess.o diff --git a/arch/arc/lib/uaccess.S b/arch/arc/lib/uaccess.S new file mode 100644 index 000000000000..5093160a72d3 --- /dev/null +++ b/arch/arc/lib/uaccess.S @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * uaccess for ARCv3: avoids ZOL, uses 64-bit memory ops + * ASSUMES unaligned access + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifndef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS +#error "Unaligned access support needed" +#endif + +; Input +; - r0: dest, kernel +; - r1: src, user +; - r2: sz +; Output +; - r0: Num bytes left to copy, 0 on success + +ENTRY_CFI(raw_copy_from_user) + + add r8, r0, r2 + + lsr.f r3, r2, 4 + bz .L1dobytes + + ; chunks of 16 bytes +10: LD64.ab r4, r1, 8 +11: LD64.ab r6, r1, 8 + ST64.ab r4, r0, 8 + ST64.ab r6, r0, 8 + DBNZR r3, 10b + +.L1dobytes: + ; last 1-15 bytes + and.f r3, r2, 0xf + bz .L1done + +12: ldb.ab r4, [r1, 1] + stb.ab r4, [r0, 1] + DBNZR r3, 12b + +.L1done: + ; bytes not copied = orig_src + sz - curr_src + j.d [blink] + sub r0, r8, r0 +END_CFI(raw_copy_from_user) + +.section __ex_table, "a" + .word 10b, .L1done + .word 11b, .L1done + .word 12b, .L1done +.previous + +; Input +; - r0: dest, user +; - r1: src, kernel +; - r2: sz +; Output +; - r0: Num bytes left to copy, 0 on success + +ENTRY_CFI(raw_copy_to_user) + + add r8, r1, r2 + + lsr.f r3, r2, 4 + bz .L2dobytes + + ; chunks of 16 bytes +2: LD64.ab r4, r1, 8 + LD64.ab r6, r1, 8 +20: ST64.ab r4, r0, 8 +21: ST64.ab r6, r0, 8 + DBNZR r3, 2b + +.L2dobytes: + ; last 1-15 bytes + and.f r3, r2, 0xf + bz .L2done + +2: ldb.ab r4, [r1, 1] +22: stb.ab r4, [r0, 1] + DBNZR r3, 2b + +.L2done: + ; bytes not copied = orig_src + sz - curr_src + j.d [blink] + sub r0, r8, r1 + +END_CFI(raw_copy_to_user) + +.section __ex_table, "a" + .word 20b, .L2done + .word 21b, .L2done + .word 22b, .L2done +.previous + +ENTRY_CFI(__clear_user) + add r8, r0, r1 + + mov r4, 0 + mov r5, 0 + + lsr.f r3, r1, 4 + bz .L3dobytes + + ; chunks of 16 bytes +30: ST64.ab r4, r0, 8 +31: ST64.ab r4, r0, 8 + DBNZR r3, 30b + +.L3dobytes: + ; last 1-15 bytes + and.f r3, r1, 0xf + bz .L3done + +32: stb.ab r4, [r0, 1] + DBNZR r3, 32b + +.L3done: + ; bytes not copied = orig_src + sz - curr_src + j.d [blink] + sub r0, r8, r0 + +END_CFI(__clear_user) + +; Note that .fixup section is missing and that is not an omission +; +; .fixup is a level of indirection for user fault handling to do some extra work +; before jumping off to a safe instruction (past the faulting LD/ST) in uaccess +; code. This could be say setting up -EFAULT in return register for caller. +; But if that is not needed (such as above where number of bytes copied/not-copied +; is already in return reg r0) and fault handler only needs to resume to a valid PC +; that label could be placed in __ex_table entry (otherwise be in .fixup) +; do_page_fault() -> fixup_exception() use that to setup pt_regs->ret, which the +; CPU exception handler resumes to. This also makes the handling more efficient +; by removing a level of indirection. + +.section __ex_table, "a" + .word 30b, .L3done + .word 31b, .L3done + .word 32b, .L3done +.previous -- 2.25.1 _______________________________________________ linux-snps-arc mailing list linux-snps-arc@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-snps-arc