in4_cksum(), used to compute packet checksums for the legacy internet
protocol, has been hand-optimized for speed on most elderly platforms,
with the most recent pieces of silicon using a portable C
implementation.

Most of these implementations, in a not-so-uncommon case, need to
checksum an extra (``overlay'') header, and invoke memset or bzero on
such an overlay, prior to initializing it and checksumming it.

However, except for one byte, the zeroed parts are useless since they
will not change the checksum, so that memset/bzero call can be removed,
and the sum can omit the first 8 bytes which will always be zero.

The following diff implements that idea. Plus on sparc64 you get one
useless assembly instruction removed, for free, isn't that awesome?

Affected platforms: alpha, amd64, arm64, hppa, landisk, luna88k, macppc,
octeon, powerpc64, riscv64, sparc64. No need to test on armv7 and i386.

Index: sys/arch/alpha/alpha/in_cksum.c
===================================================================
RCS file: /OpenBSD/src/sys/arch/alpha/alpha/in_cksum.c,v
retrieving revision 1.9
diff -u -p -r1.9 in_cksum.c
--- sys/arch/alpha/alpha/in_cksum.c     21 Aug 2014 14:24:08 -0000      1.9
+++ sys/arch/alpha/alpha/in_cksum.c     25 Jan 2022 20:21:06 -0000
@@ -212,14 +212,13 @@ in4_cksum(struct mbuf *m, u_int8_t nxt, 
                        panic("in4_cksum: bad mbuf chain");
 #endif
 
-               memset(&ipov, 0, sizeof(ipov));
-
-               ipov.ih_len = htons(len);
+               ipov.ih_x1[8] = 0;
                ipov.ih_pr = nxt;
+               ipov.ih_len = htons(len);
                ipov.ih_src = mtod(m, struct ip *)->ip_src;
                ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
 
-               sum += in_cksumdata((caddr_t) &ipov, sizeof(ipov));
+               sum += in_cksumdata((caddr_t) &ipov + 8, sizeof(ipov) - 8);
        }
 
        /* skip over unnecessary part */
Index: sys/arch/m88k/m88k/in_cksum.c
===================================================================
RCS file: /OpenBSD/src/sys/arch/m88k/m88k/in_cksum.c,v
retrieving revision 1.4
diff -u -p -r1.4 in_cksum.c
--- sys/arch/m88k/m88k/in_cksum.c       21 Aug 2014 14:24:08 -0000      1.4
+++ sys/arch/m88k/m88k/in_cksum.c       25 Jan 2022 20:21:07 -0000
@@ -95,19 +95,22 @@ in4_cksum(struct mbuf *m, uint8_t nxt, i
 {
        u_int16_t *w;
        u_int sum = 0;
-       struct ipovly ipov;
+       union {
+               struct ipovly ipov;
+               u_int16_t w[10];
+       } u;
 
        if (nxt != 0) {
                /* pseudo header */
-               bzero(&ipov, sizeof(ipov));
-               ipov.ih_len = htons(len);
-               ipov.ih_pr = nxt; 
-               ipov.ih_src = mtod(m, struct ip *)->ip_src; 
-               ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
-               w = (u_int16_t *)&ipov;
-               /* assumes sizeof(ipov) == 20 */
-               sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
-               sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
+               u.ipov.ih_x1[8] = 0; 
+               u.ipov.ih_pr = nxt; 
+               u.ipov.ih_len = htons(len);
+               u.ipov.ih_src = mtod(m, struct ip *)->ip_src; 
+               u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+               w = u.w;
+               /* assumes sizeof(ipov) == 20 and first 8 bytes are zeroes */
+               sum += w[4]; sum += w[5]; sum += w[6];
+               sum += w[7]; sum += w[8]; sum += w[9];
        }
 
        /* skip unnecessary part */
Index: sys/arch/powerpc/powerpc/in_cksum.c
===================================================================
RCS file: /OpenBSD/src/sys/arch/powerpc/powerpc/in_cksum.c,v
retrieving revision 1.10
diff -u -p -r1.10 in_cksum.c
--- sys/arch/powerpc/powerpc/in_cksum.c 22 Jul 2014 10:35:35 -0000      1.10
+++ sys/arch/powerpc/powerpc/in_cksum.c 25 Jan 2022 20:21:07 -0000
@@ -254,15 +254,15 @@ in4_cksum(struct mbuf *m, uint8_t nxt, i
 
        if (nxt != 0) {
                /* pseudo header */
-               memset(&u.ipov, 0, sizeof(u.ipov));
-               u.ipov.ih_len = htons(len);
+               u.ipov.ih_x1[8] = 0;
                u.ipov.ih_pr = nxt; 
+               u.ipov.ih_len = htons(len);
                u.ipov.ih_src = mtod(m, struct ip *)->ip_src; 
                u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
                w = u.w;
-               /* assumes sizeof(ipov) == 20 */
-               sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
-               sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
+               /* assumes sizeof(ipov) == 20 and first 8 bytes are zeroes */
+               sum += w[4]; sum += w[5]; sum += w[6];
+               sum += w[7]; sum += w[8]; sum += w[9];
        }
 
        /* skip unnecessary part */
Index: sys/arch/sparc64/sparc64/in4_cksum.c
===================================================================
RCS file: /OpenBSD/src/sys/arch/sparc64/sparc64/in4_cksum.c,v
retrieving revision 1.7
diff -u -p -r1.7 in4_cksum.c
--- sys/arch/sparc64/sparc64/in4_cksum.c        24 Aug 2014 20:06:57 -0000      
1.7
+++ sys/arch/sparc64/sparc64/in4_cksum.c        25 Jan 2022 20:21:07 -0000
@@ -93,7 +93,6 @@ extern int in_cksum_internal(struct mbuf
 int
 in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
 {
-       u_char *w;
        u_int sum = 0;
        struct ipovly ipov;
 
@@ -106,24 +105,19 @@ in4_cksum(struct mbuf *m, u_int8_t nxt, 
 
        if (nxt != 0) {
                /* pseudo header */
-               memset(&ipov, 0, sizeof(ipov));
-               ipov.ih_len = htons(len);
-               ipov.ih_pr = nxt; 
                ipov.ih_src = mtod(m, struct ip *)->ip_src; 
                ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
-               w = (u_char *)&ipov;
+               sum = ((u_int)nxt << 16) | htons(len);
                /* assumes sizeof(ipov) == 20 */
-               __asm volatile(" lduw [%5 + 0], %1; "
-                       " lduw [%5 + 4], %2; "
-                       " lduw [%5 + 8], %3; add %0, %1, %0; "
-                       " lduw [%5 + 12], %1; add %0, %2, %0; "
-                       " lduw [%5 + 16], %2; add %0, %3, %0; "
+               __asm volatile(
+                       " lduw [%5 + 12], %1; "
+                       " lduw [%5 + 16], %2; "
                        " mov -1, %3; add %0, %1, %0; "
                        " srl %3, 0, %3; add %0, %2, %0; "
-                       " srlx %0, 32, %2; and %0, %3, %1; "
+                       " srlx %0, 32, %2; "
                        " add %0, %2, %0; "
                        : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3)
-                       : "0" (sum), "r" (w));
+                       : "0" (sum), "r" (&ipov));
        }
 
        /* skip unnecessary part */
Index: sys/netinet/in4_cksum.c
===================================================================
RCS file: /OpenBSD/src/sys/netinet/in4_cksum.c,v
retrieving revision 1.10
diff -u -p -r1.10 in4_cksum.c
--- sys/netinet/in4_cksum.c     8 Sep 2014 06:24:13 -0000       1.10
+++ sys/netinet/in4_cksum.c     25 Jan 2022 20:21:09 -0000
@@ -111,15 +111,15 @@ in4_cksum(struct mbuf *m, u_int8_t nxt, 
                        panic("in4_cksum: offset too short");
                if (m->m_len < sizeof(struct ip))
                        panic("in4_cksum: bad mbuf chain");
-               bzero(&u.ipov, sizeof(u.ipov));
-               u.ipov.ih_len = htons(len);
+               u.ipov.ih_x1[8] = 0;
                u.ipov.ih_pr = nxt;
+               u.ipov.ih_len = htons(len);
                u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
                u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
                w = u.w;
-               /* assumes sizeof(ipov) == 20 */
-               sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
-               sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
+               /* assumes sizeof(ipov) == 20 and first 8 bytes are zeroes */
+               sum += w[4]; sum += w[5]; sum += w[6];
+               sum += w[7]; sum += w[8]; sum += w[9];
        }
 
        /* skip unnecessary part */

Reply via email to