From: Vineet Gupta <vgu...@kernel.org> Add checksum implementation based on double load/stores if ZOL is not supported.
Signed-off-by: Vineet Gupta <vgu...@kernel.org> --- arch/arc/include/asm/checksum.h | 58 ++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h index 0b485800a392..435017be9900 100644 --- a/arch/arc/include/asm/checksum.h +++ b/arch/arc/include/asm/checksum.h @@ -29,10 +29,13 @@ static inline __sum16 csum_fold(__wsum s) s -= r; return s >> 16; } +#define csum_fold csum_fold +#ifndef CONFIG_ARC_LACKS_ZOL /* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * @ihl comes from IP hdr and is number of 4-byte words */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) @@ -62,6 +65,54 @@ ip_fast_csum(const void *iph, unsigned int ihl) return csum_fold(sum); } +#else + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * @ihl comes from IP hdr and is number of 4-byte words + * - No loop enterted for canonical 5 words + * - optimized for ARCv2 + * - LDL double load for fetching first 16 bytes + * - DBNZ instruction for looping (ZOL not used) + */ +static inline __sum16 +ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int tmp, sum; + u64 dw1, dw2; + + __asm__( +#ifdef CONFIG_ARC_HAS_LL64 + " ldd.ab %0, [%4, 8] \n" + " ldd.ab %1, [%4, 8] \n" +#else + " ld.ab %L0, [%4, 4] \n" + " ld.ab %H0, [%4, 4] \n" + " ld.ab %L1, [%4, 4] \n" + " ld.ab %H1, [%4, 4] \n" +#endif + " sub %5, %5, 4 \n" + " add.f %3, %L0, %H0 \n" + " adc.f %3, %3, %L1 \n" + " adc.f %3, %3, %H1 \n" + "1: ld.ab %2, [%4, 4] \n" + " adc.f %3, %3, %2 \n" + " DBNZR %5, 1b \n" + " add.cs %3, %3, 1 \n" + + : "=&r" (dw1), "=&r" (dw2), "=&r" (tmp), "=&r" (sum), + "+&r" (iph), "+&r"(ihl) + : + : "cc", "memory"); + + return csum_fold(sum); +} + +#endif + +#define ip_fast_csum ip_fast_csum + /* * TCP pseudo Header is 12 bytes: * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2] @@ -88,9 +139,6 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, return sum; } - -#define csum_fold csum_fold -#define ip_fast_csum ip_fast_csum #define csum_tcpudp_nofold csum_tcpudp_nofold #include <asm-generic/checksum.h> -- 2.25.1 _______________________________________________ linux-snps-arc mailing list linux-snps-arc@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-snps-arc