From: Vineet Gupta <vgu...@kernel.org>

Add checksum implementation based on double load/stores
if ZOL is not supported.

Signed-off-by: Vineet Gupta <vgu...@kernel.org>
---
 arch/arc/include/asm/checksum.h | 58 ++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
index 0b485800a392..435017be9900 100644
--- a/arch/arc/include/asm/checksum.h
+++ b/arch/arc/include/asm/checksum.h
@@ -29,10 +29,13 @@ static inline __sum16 csum_fold(__wsum s)
        s -= r;
        return s >> 16;
 }
+#define csum_fold csum_fold
 
+#ifndef CONFIG_ARC_LACKS_ZOL
 /*
- *     This is a version of ip_compute_csum() optimized for IP headers,
- *     which always checksum on 4 octet boundaries.
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * @ihl comes from IP hdr and is number of 4-byte words
  */
 static inline __sum16
 ip_fast_csum(const void *iph, unsigned int ihl)
@@ -62,6 +65,54 @@ ip_fast_csum(const void *iph, unsigned int ihl)
        return csum_fold(sum);
 }
 
+#else
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * @ihl comes from IP hdr and is number of 4-byte words
+ *  - No loop enterted for canonical 5 words
+ *  - optimized for ARCv2
+ *    - LDL double load for fetching first 16 bytes
+ *    - DBNZ instruction for looping (ZOL not used)
+ */
+static inline __sum16
+ip_fast_csum(const void *iph, unsigned int ihl)
+{
+       unsigned int tmp, sum;
+       u64 dw1, dw2;
+
+       __asm__(
+#ifdef CONFIG_ARC_HAS_LL64
+       "       ldd.ab %0, [%4, 8]      \n"
+       "       ldd.ab %1, [%4, 8]      \n"
+#else
+       "       ld.ab %L0, [%4, 4]      \n"
+       "       ld.ab %H0, [%4, 4]      \n"
+       "       ld.ab %L1, [%4, 4]      \n"
+       "       ld.ab %H1, [%4, 4]      \n"
+#endif
+       "       sub    %5, %5,  4       \n"
+       "       add.f  %3, %L0, %H0     \n"
+       "       adc.f  %3, %3,  %L1     \n"
+       "       adc.f  %3, %3,  %H1     \n"
+       "1:     ld.ab  %2, [%4, 4]      \n"
+       "       adc.f  %3, %3,  %2      \n"
+       "       DBNZR  %5, 1b           \n"
+       "       add.cs %3, %3,  1       \n"
+
+       : "=&r" (dw1), "=&r" (dw2), "=&r" (tmp), "=&r" (sum),
+         "+&r" (iph), "+&r"(ihl)
+       :
+       : "cc", "memory");
+
+       return csum_fold(sum);
+}
+
+#endif
+
+#define ip_fast_csum ip_fast_csum
+
 /*
  * TCP pseudo Header is 12 bytes:
  * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
@@ -88,9 +139,6 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
 
        return sum;
 }
-
-#define csum_fold csum_fold
-#define ip_fast_csum ip_fast_csum
 #define csum_tcpudp_nofold csum_tcpudp_nofold
 
 #include <asm-generic/checksum.h>
-- 
2.25.1


_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

Reply via email to