Hi Paolo, > I pushed it now, thanks for the review!
After I added some more unit tests for u8_strchr, I see crashes and assertion failures: 1) mem = (UNIT *) (page_boundary - 2 * sizeof (UNIT)); mem[0] = 0x50; mem[1] = 0; ASSERT (u8_strchr (mem, 0x123) == NULL); /* crashes */ 2) mem = (UNIT *) (page_boundary - 3 * sizeof (UNIT)); mem[0] = 0x50; mem[1] = 0x50; mem[2] = 0; ASSERT (u8_strchr (mem, 0x123) == NULL); /* assertion failed */ 3) mem = (UNIT *) (page_boundary - 3 * sizeof (UNIT)); mem[0] = 0x50; mem[1] = 0x50; mem[2] = 0; ASSERT (u8_strchr (mem, 0x3456) == NULL); /* crashes */ 4) mem = (UNIT *) (page_boundary - 4 * sizeof (UNIT)); mem[0] = 0x50; mem[1] = 0x50; mem[2] = 0x50; mem[3] = 0; ASSERT (u8_strchr (mem, 0x23456) == NULL); /* crashes */ I'm applying the two attached patches: Bug fixes, then comments and tiny optimizations. 2010-07-31 Bruno Haible <br...@clisp.org> unistr/u8-strchr: Fix several bugs. * lib/unistr/u8-strchr.c (u8_strchr): Don't search beyond the end of the string. When not found, return NULL, not a pointer near the end. --- lib/unistr/u8-strchr.c.orig Sat Jul 31 21:26:45 2010 +++ lib/unistr/u8-strchr.c Sat Jul 31 21:22:34 2010 @@ -62,7 +62,7 @@ switch (u8_uctomb_aux (c, uc, 6)) { case 2: - if (*s == 0) + if (*s == 0 || s[1] == 0) break; { uint8_t c0 = c[0]; @@ -96,11 +96,11 @@ if (s[1] == 0) break; } - return (uint8_t *) s; } + break; case 3: - if (*s == 0 || s[1] == 0) + if (*s == 0 || s[1] == 0 || s[2] == 0) break; { uint8_t c0 = c[0]; @@ -147,7 +147,7 @@ } case 4: - if (*s == 0 || s[1] == 0 || s[2] == 0) + if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0) break; { uint8_t c0 = c[0]; 2010-07-31 Bruno Haible <br...@clisp.org> unistr/u8-chr, unistr/u8-strchr: Optimize and add comments. * lib/unistr/u8-chr.c (u8_chr): Add comments. Remove a useless test at the beginning of the loop. * lib/unistr/u8-strchr.c (u8_strchr): Add comments. Don't fall through cases in 'switch' statement. --- lib/unistr/u8-chr.c.orig Sat Jul 31 21:29:46 2010 +++ lib/unistr/u8-chr.c Sat Jul 31 21:29:14 2010 @@ -70,14 +70,17 @@ uint8_t c1 = c[1]; const uint8_t *end = s + n - 1; - while (s < end) + do { + /* Here s < end. + Test whether s[0..1] == { c0, c1 }. */ uint8_t s1 = s[1]; if (s1 == c1) { if (*s == c0) return (uint8_t *) s; else + /* Skip the search at s + 1, because s[1] = c1 < c0. */ s += 2; } else @@ -85,9 +88,11 @@ if (s1 == c0) s++; else + /* Skip the search at s + 1, because s[1] != c0. */ s += 2; } } + while (s < end); break; } @@ -104,14 +109,19 @@ else skip = 3; - while (s < end) + do { + /* Here s < end. + Test whether s[0..2] == { c0, c1, c2 }. */ uint8_t s2 = s[2]; if (s2 == c2) { if (s[1] == c1 && *s == c0) return (uint8_t *) s; else + /* If c2 != c1: + Skip the search at s + 1, because s[2] == c2 != c1. + Skip the search at s + 2, because s[2] == c2 < c0. */ s += skip; } else @@ -119,11 +129,15 @@ if (s2 == c1) s++; else if (s2 == c0) + /* Skip the search at s + 1, because s[2] != c1. */ s += 2; else + /* Skip the search at s + 1, because s[2] != c1. + Skip the search at s + 2, because s[2] != c0. */ s += 3; } } + while (s < end); break; } @@ -143,14 +157,21 @@ else skip = 4; - while (s < end) + do { + /* Here s < end. + Test whether s[0..3] == { c0, c1, c2, c3 }. */ uint8_t s3 = s[3]; if (s3 == c3) { if (s[2] == c2 && s[1] == c1 && *s == c0) return (uint8_t *) s; else + /* If c3 != c2: + Skip the search at s + 1, because s[3] == c3 != c2. + If c3 != c1: + Skip the search at s + 2, because s[3] == c3 != c1. + Skip the search at s + 3, because s[3] == c3 < c0. */ s += skip; } else @@ -158,13 +179,20 @@ if (s3 == c2) s++; else if (s3 == c1) + /* Skip the search at s + 1, because s[3] != c2. */ s += 2; else if (s3 == c0) + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. */ s += 3; else + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. + Skip the search at s + 3, because s[3] != c0. */ s += 4; } } + while (s < end); break; } } --- lib/unistr/u8-strchr.c.orig Sat Jul 31 21:29:46 2010 +++ lib/unistr/u8-strchr.c Sat Jul 31 21:29:14 2010 @@ -67,15 +67,19 @@ { uint8_t c0 = c[0]; uint8_t c1 = c[1]; + /* Search for { c0, c1 }. */ uint8_t s1 = s[1]; for (;;) { + /* Here s[0] != 0, s[1] != 0. + Test whether s[0..1] == { c0, c1 }. */ if (s1 == c1) { if (*s == c0) return (uint8_t *) s; else + /* Skip the search at s + 1, because s[1] = c1 < c0. */ goto case2_skip2; } else @@ -83,6 +87,7 @@ if (s1 == c0) goto case2_skip1; else + /* Skip the search at s + 1, because s[1] != c0. */ goto case2_skip2; } case2_skip2: @@ -106,26 +111,36 @@ uint8_t c0 = c[0]; uint8_t c1 = c[1]; uint8_t c2 = c[2]; + /* Search for { c0, c1, c2 }. */ uint8_t s2 = s[2]; for (;;) { + /* Here s[0] != 0, s[1] != 0, s[2] != 0. + Test whether s[0..2] == { c0, c1, c2 }. */ if (s2 == c2) { if (s[1] == c1 && *s == c0) return (uint8_t *) s; - else if (c2 == c1) - goto case3_skip1; else - goto case3_skip3; + /* If c2 != c1: + Skip the search at s + 1, because s[2] == c2 != c1. + Skip the search at s + 2, because s[2] == c2 < c0. */ + if (c2 == c1) + goto case3_skip1; + else + goto case3_skip3; } else { if (s2 == c1) goto case3_skip1; else if (s2 == c0) + /* Skip the search at s + 1, because s[2] != c1. */ goto case3_skip2; else + /* Skip the search at s + 1, because s[2] != c1. + Skip the search at s + 2, because s[2] != c0. */ goto case3_skip3; } case3_skip3: @@ -145,6 +160,7 @@ break; } } + break; case 4: if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0) @@ -154,30 +170,45 @@ uint8_t c1 = c[1]; uint8_t c2 = c[2]; uint8_t c3 = c[3]; + /* Search for { c0, c1, c2, c3 }. */ uint8_t s3 = s[3]; for (;;) { + /* Here s[0] != 0, s[1] != 0, s[2] != 0, s[3] != 0. + Test whether s[0..3] == { c0, c1, c2, c3 }. */ if (s3 == c3) { if (s[2] == c2 && s[1] == c1 && *s == c0) return (uint8_t *) s; - else if (c3 == c2) - goto case4_skip1; - else if (c3 == c1) - goto case4_skip2; else - goto case4_skip4; + /* If c3 != c2: + Skip the search at s + 1, because s[3] == c3 != c2. + If c3 != c1: + Skip the search at s + 2, because s[3] == c3 != c1. + Skip the search at s + 3, because s[3] == c3 < c0. */ + if (c3 == c2) + goto case4_skip1; + else if (c3 == c1) + goto case4_skip2; + else + goto case4_skip4; } else { if (s3 == c2) goto case4_skip1; else if (s3 == c1) + /* Skip the search at s + 1, because s[3] != c2. */ goto case4_skip2; else if (s3 == c0) + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. */ goto case4_skip3; else + /* Skip the search at s + 1, because s[3] != c2. + Skip the search at s + 2, because s[3] != c1. + Skip the search at s + 3, because s[3] != c0. */ goto case4_skip4; } case4_skip4: @@ -202,6 +233,7 @@ break; } } + break; } return NULL;