Support mcel API for apps that prefer it. This mirrors the recent changes to mbsstr. * lib/mbscasestr.c [GNULIB_MCEL_PREFER]: Include mcel.h not mbuiter.h. (mbchar_t, mb_equal) [GNULIB_MCEL_PREFER]: New type and function, to make it easier to use common code. (knuth_morris_pratt_multibyte): Don't assume mbchar_t's alignment is at least that of size_t. (knuth_morris_pratt_multibyte, mbscasestr) [GNULIB_MCEL_PREFER]: Use mcel API. * modules/mbscasestr (Depends-on): Add alignasof. --- ChangeLog | 14 ++++ lib/mbscasestr.c | 160 +++++++++++++++++++++++++++++++++++++++++++-- lib/mbsstr.c | 8 +-- modules/mbscasestr | 1 + 4 files changed, 174 insertions(+), 9 deletions(-)
diff --git a/ChangeLog b/ChangeLog index 39b043775f..7b7246f2a5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2023-09-25 Paul Eggert <egg...@cs.ucla.edu> + + mbscasestr: support GNULIB_MCEL_PREFER + Support mcel API for apps that prefer it. + This mirrors the recent changes to mbsstr. + * lib/mbscasestr.c [GNULIB_MCEL_PREFER]: Include mcel.h not mbuiter.h. + (mbchar_t, mb_equal) [GNULIB_MCEL_PREFER]: New type and function, + to make it easier to use common code. + (knuth_morris_pratt_multibyte): Don't assume mbchar_t's alignment + is at least that of size_t. + (knuth_morris_pratt_multibyte, mbscasestr) [GNULIB_MCEL_PREFER]: + Use mcel API. + * modules/mbscasestr (Depends-on): Add alignasof. + 2023-09-24 Bernhard Voelker <m...@bernhard-voelker.de> maintainer-makefile: Fix syntax-check rules wrt README. diff --git a/lib/mbscasestr.c b/lib/mbscasestr.c index d2b439f1e1..a3ab3cb049 100644 --- a/lib/mbscasestr.c +++ b/lib/mbscasestr.c @@ -25,7 +25,14 @@ #include <stdlib.h> #include "malloca.h" -#include "mbuiter.h" + +#if GNULIB_MCEL_PREFER +# include "mcel.h" +typedef mcel_t mbchar_t; +static bool mb_equal (mcel_t a, mcel_t b) { return mcel_cmp (a, b) == 0; } +#else +# include "mbuiter.h" +#endif /* Knuth-Morris-Pratt algorithm. */ #define UNIT unsigned char @@ -43,16 +50,31 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, { size_t m = mbslen (needle); mbchar_t *needle_mbchars; - size_t *table; + size_t extra_align = (alignof (mbchar_t) < alignof (size_t) + ? alignof (size_t) - alignof (mbchar_t) + : 0); /* Allocate room for needle_mbchars and the table. */ - char *memory = (char *) nmalloca (m, sizeof (mbchar_t) + sizeof (size_t)); + void *memory = nmalloca (m + !!extra_align, + sizeof (mbchar_t) + sizeof (size_t)); + void *table_memory; if (memory == NULL) return false; - needle_mbchars = (mbchar_t *) memory; - table = (size_t *) (memory + m * sizeof (mbchar_t)); + needle_mbchars = memory; + table_memory = needle_mbchars + m; + char *aligned = table_memory; + aligned += extra_align; + aligned -= (uintptr_t) aligned % alignof (size_t); + size_t *table = table_memory = aligned; /* Fill needle_mbchars. */ +#if GNULIB_MCEL_PREFER + for (size_t j = 0; *needle; needle += needle_mbchars[j++].len) + { + needle_mbchars[j] = mcel_scanz (needle); + needle_mbchars[j].ch = c32tolower (needle_mbchars[j].ch); + } +#else { mbui_iterator_t iter; size_t j; @@ -65,6 +87,7 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, needle_mbchars[j].wc = c32tolower (needle_mbchars[j].wc); } } +#endif /* Fill the table. For 0 < i < m: @@ -135,6 +158,47 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, /* Search, using the table to accelerate the processing. */ { +#if GNULIB_MCEL_PREFER + size_t j; + char const *rhaystack = haystack; + char const *phaystack = haystack; + + j = 0; + /* Invariant: phaystack = rhaystack + j. */ + for (;;) + { + if (!*phaystack) + { + rhaystack = NULL; + break; + } + mcel_t g = mcel_scanz (phaystack); + g.ch = c32tolower (g.ch); + if (mcel_cmp (needle_mbchars[j], g) == 0) + { + j++; + /* Exit loop successfully if the entire needle has been found. */ + if (j == m) + break; + phaystack += g.len; + } + else if (j == 0) + { + /* Found a mismatch at needle[0] already. */ + rhaystack += mcel_scanz (rhaystack).len; + phaystack += g.len; + } + else + { + /* Found a match of needle[0..j-1], mismatch at needle[j]. */ + size_t count = table[j]; + j -= count; + for (; count != 0; count--) + rhaystack += mcel_scanz (rhaystack).len; + } + } + *resultp = rhaystack; +#else size_t j; mbui_iterator_t rhaystack; mbui_iterator_t phaystack; @@ -183,6 +247,7 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, mbui_advance (phaystack); } } +#endif } freea (memory); @@ -203,6 +268,90 @@ mbscasestr (const char *haystack, const char *needle) needle may be found in haystack. */ if (MB_CUR_MAX > 1) { +#if GNULIB_MCEL_PREFER + if (!*needle) + return (char *) haystack; + + mcel_t ng = mcel_scanz (needle); + ng.ch = c32tolower (ng.ch); + + /* Minimizing the worst-case complexity: + Let n = mbslen(haystack), m = mbslen(needle). + The naïve algorithm is O(n*m) worst-case. + The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a + memory allocation. + To achieve linear complexity and yet amortize the cost of the + memory allocation, we activate the Knuth-Morris-Pratt algorithm + only once the naïve algorithm has already run for some time; more + precisely, when + - the outer loop count is >= 10, + - the average number of comparisons per outer loop is >= 5, + - the total number of comparisons is >= m. + But we try it only once. If the memory allocation attempt failed, + we don't retry it. */ + bool try_kmp = true; + size_t outer_loop_count = 0; + size_t comparison_count = 0; + + /* Last comparison count, and needle + last_ccount. */ + size_t last_ccount = 0; + char const *iter_needle_last_ccount = needle; + + char const *iter_haystack = haystack; + + for (mcel_t hg; *iter_haystack; iter_haystack += hg.len) + { + /* See whether it's advisable to use an asymptotically faster + algorithm. */ + if (try_kmp + && outer_loop_count >= 10 + && comparison_count >= 5 * outer_loop_count) + { + /* See if needle + comparison_count now reaches the end of + needle. */ + size_t count = comparison_count - last_ccount; + for (; + count > 0 && *iter_needle_last_ccount; + count--) + iter_needle_last_ccount + += mcel_scanz (iter_needle_last_ccount).len; + last_ccount = comparison_count; + if (!*iter_needle_last_ccount) + { + char const *result; + if (knuth_morris_pratt_multibyte (haystack, needle, + &result)) + return (char *) result; + try_kmp = false; + } + } + + outer_loop_count++; + comparison_count++; + hg = mcel_scanz (iter_haystack); + hg.ch = c32tolower (hg.ch); + if (mcel_cmp (hg, ng) == 0) + /* The first character matches. */ + { + char const *rhaystack = iter_haystack + hg.len; + char const *rneedle = needle + ng.len; + mcel_t rhg, rng; + do + { + if (!*rneedle) + return (char *) iter_haystack; + if (!*rhaystack) + return NULL; + rhg = mcel_scanz (rhaystack); rhaystack += rhg.len; + rng = mcel_scanz (rneedle); rneedle += rng.len; + comparison_count++; + } + while (mcel_tocmp (c32tolower, rhg, rng) == 0); + } + } + + return NULL; +#else mbui_iterator_t iter_needle; mbui_init (iter_needle, needle); @@ -311,6 +460,7 @@ mbscasestr (const char *haystack, const char *needle) } else return (char *) haystack; +#endif } else { diff --git a/lib/mbsstr.c b/lib/mbsstr.c index b8d15f9521..ca373bf81b 100644 --- a/lib/mbsstr.c +++ b/lib/mbsstr.c @@ -67,19 +67,19 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, size_t *table = table_memory = aligned; /* Fill needle_mbchars. */ - { #if GNULIB_MCEL_PREFER - for (size_t j = 0; *needle; needle += needle_mbchars[j++].len) - needle_mbchars[j] = mcel_scanz (needle); + for (size_t j = 0; *needle; needle += needle_mbchars[j++].len) + needle_mbchars[j] = mcel_scanz (needle); #else + { mbui_iterator_t iter; size_t j; j = 0; for (mbui_init (iter, needle); mbui_avail (iter); mbui_advance (iter), j++) mb_copy (&needle_mbchars[j], &mbui_cur (iter)); -#endif } +#endif /* Fill the table. For 0 < i < m: diff --git a/modules/mbscasestr b/modules/mbscasestr index 15e925d6e1..7b2324717c 100644 --- a/modules/mbscasestr +++ b/modules/mbscasestr @@ -6,6 +6,7 @@ lib/mbscasestr.c lib/str-kmp.h Depends-on: +alignasof mbuiter stdbool string -- 2.41.0