Support mcel API for apps that prefer it.
This mirrors the recent changes to mbsstr.
* lib/mbscasestr.c [GNULIB_MCEL_PREFER]: Include mcel.h not mbuiter.h.
(mbchar_t, mb_equal) [GNULIB_MCEL_PREFER]: New type and function,
to make it easier to use common code.
(knuth_morris_pratt_multibyte): Don't assume mbchar_t's alignment
is at least that of size_t.
(knuth_morris_pratt_multibyte, mbscasestr) [GNULIB_MCEL_PREFER]:
Use mcel API.
* modules/mbscasestr (Depends-on): Add alignasof.
---
 ChangeLog          |  14 ++++
 lib/mbscasestr.c   | 160 +++++++++++++++++++++++++++++++++++++++++++--
 lib/mbsstr.c       |   8 +--
 modules/mbscasestr |   1 +
 4 files changed, 174 insertions(+), 9 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 39b043775f..7b7246f2a5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2023-09-25  Paul Eggert  <egg...@cs.ucla.edu>
+
+       mbscasestr: support GNULIB_MCEL_PREFER
+       Support mcel API for apps that prefer it.
+       This mirrors the recent changes to mbsstr.
+       * lib/mbscasestr.c [GNULIB_MCEL_PREFER]: Include mcel.h not mbuiter.h.
+       (mbchar_t, mb_equal) [GNULIB_MCEL_PREFER]: New type and function,
+       to make it easier to use common code.
+       (knuth_morris_pratt_multibyte): Don't assume mbchar_t's alignment
+       is at least that of size_t.
+       (knuth_morris_pratt_multibyte, mbscasestr) [GNULIB_MCEL_PREFER]:
+       Use mcel API.
+       * modules/mbscasestr (Depends-on): Add alignasof.
+
 2023-09-24  Bernhard Voelker  <m...@bernhard-voelker.de>
 
        maintainer-makefile: Fix syntax-check rules wrt README.
diff --git a/lib/mbscasestr.c b/lib/mbscasestr.c
index d2b439f1e1..a3ab3cb049 100644
--- a/lib/mbscasestr.c
+++ b/lib/mbscasestr.c
@@ -25,7 +25,14 @@
 #include <stdlib.h>
 
 #include "malloca.h"
-#include "mbuiter.h"
+
+#if GNULIB_MCEL_PREFER
+# include "mcel.h"
+typedef mcel_t mbchar_t;
+static bool mb_equal (mcel_t a, mcel_t b) { return mcel_cmp (a, b) == 0; }
+#else
+# include "mbuiter.h"
+#endif
 
 /* Knuth-Morris-Pratt algorithm.  */
 #define UNIT unsigned char
@@ -43,16 +50,31 @@ knuth_morris_pratt_multibyte (const char *haystack, const 
char *needle,
 {
   size_t m = mbslen (needle);
   mbchar_t *needle_mbchars;
-  size_t *table;
+  size_t extra_align = (alignof (mbchar_t) < alignof (size_t)
+                        ? alignof (size_t) - alignof (mbchar_t)
+                        : 0);
 
   /* Allocate room for needle_mbchars and the table.  */
-  char *memory = (char *) nmalloca (m, sizeof (mbchar_t) + sizeof (size_t));
+  void *memory = nmalloca (m + !!extra_align,
+                           sizeof (mbchar_t) + sizeof (size_t));
+  void *table_memory;
   if (memory == NULL)
     return false;
-  needle_mbchars = (mbchar_t *) memory;
-  table = (size_t *) (memory + m * sizeof (mbchar_t));
+  needle_mbchars = memory;
+  table_memory = needle_mbchars + m;
+  char *aligned = table_memory;
+  aligned += extra_align;
+  aligned -= (uintptr_t) aligned % alignof (size_t);
+  size_t *table = table_memory = aligned;
 
   /* Fill needle_mbchars.  */
+#if GNULIB_MCEL_PREFER
+  for (size_t j = 0; *needle; needle += needle_mbchars[j++].len)
+    {
+      needle_mbchars[j] = mcel_scanz (needle);
+      needle_mbchars[j].ch = c32tolower (needle_mbchars[j].ch);
+    }
+#else
   {
     mbui_iterator_t iter;
     size_t j;
@@ -65,6 +87,7 @@ knuth_morris_pratt_multibyte (const char *haystack, const 
char *needle,
           needle_mbchars[j].wc = c32tolower (needle_mbchars[j].wc);
       }
   }
+#endif
 
   /* Fill the table.
      For 0 < i < m:
@@ -135,6 +158,47 @@ knuth_morris_pratt_multibyte (const char *haystack, const 
char *needle,
 
   /* Search, using the table to accelerate the processing.  */
   {
+#if GNULIB_MCEL_PREFER
+    size_t j;
+    char const *rhaystack = haystack;
+    char const *phaystack = haystack;
+
+    j = 0;
+    /* Invariant: phaystack = rhaystack + j.  */
+    for (;;)
+      {
+        if (!*phaystack)
+          {
+            rhaystack = NULL;
+            break;
+          }
+        mcel_t g = mcel_scanz (phaystack);
+        g.ch = c32tolower (g.ch);
+        if (mcel_cmp (needle_mbchars[j], g) == 0)
+          {
+            j++;
+            /* Exit loop successfully if the entire needle has been found.  */
+            if (j == m)
+              break;
+            phaystack += g.len;
+          }
+        else if (j == 0)
+          {
+            /* Found a mismatch at needle[0] already.  */
+            rhaystack += mcel_scanz (rhaystack).len;
+            phaystack += g.len;
+          }
+        else
+          {
+            /* Found a match of needle[0..j-1], mismatch at needle[j].  */
+            size_t count = table[j];
+            j -= count;
+            for (; count != 0; count--)
+              rhaystack += mcel_scanz (rhaystack).len;
+          }
+      }
+    *resultp = rhaystack;
+#else
     size_t j;
     mbui_iterator_t rhaystack;
     mbui_iterator_t phaystack;
@@ -183,6 +247,7 @@ knuth_morris_pratt_multibyte (const char *haystack, const 
char *needle,
             mbui_advance (phaystack);
           }
       }
+#endif
   }
 
   freea (memory);
@@ -203,6 +268,90 @@ mbscasestr (const char *haystack, const char *needle)
          needle may be found in haystack.  */
   if (MB_CUR_MAX > 1)
     {
+#if GNULIB_MCEL_PREFER
+      if (!*needle)
+        return (char *) haystack;
+
+      mcel_t ng = mcel_scanz (needle);
+      ng.ch = c32tolower (ng.ch);
+
+      /* Minimizing the worst-case complexity:
+         Let n = mbslen(haystack), m = mbslen(needle).
+         The naïve algorithm is O(n*m) worst-case.
+         The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
+         memory allocation.
+         To achieve linear complexity and yet amortize the cost of the
+         memory allocation, we activate the Knuth-Morris-Pratt algorithm
+         only once the naïve algorithm has already run for some time; more
+         precisely, when
+           - the outer loop count is >= 10,
+           - the average number of comparisons per outer loop is >= 5,
+           - the total number of comparisons is >= m.
+         But we try it only once.  If the memory allocation attempt failed,
+         we don't retry it.  */
+      bool try_kmp = true;
+      size_t outer_loop_count = 0;
+      size_t comparison_count = 0;
+
+      /* Last comparison count, and needle + last_ccount.  */
+      size_t last_ccount = 0;
+      char const *iter_needle_last_ccount = needle;
+
+      char const *iter_haystack = haystack;
+
+      for (mcel_t hg; *iter_haystack; iter_haystack += hg.len)
+        {
+          /* See whether it's advisable to use an asymptotically faster
+             algorithm.  */
+          if (try_kmp
+              && outer_loop_count >= 10
+              && comparison_count >= 5 * outer_loop_count)
+            {
+              /* See if needle + comparison_count now reaches the end of
+                 needle.  */
+              size_t count = comparison_count - last_ccount;
+              for (;
+                   count > 0 && *iter_needle_last_ccount;
+                   count--)
+                iter_needle_last_ccount
+                  += mcel_scanz (iter_needle_last_ccount).len;
+              last_ccount = comparison_count;
+              if (!*iter_needle_last_ccount)
+                {
+                  char const *result;
+                  if (knuth_morris_pratt_multibyte (haystack, needle,
+                                                    &result))
+                    return (char *) result;
+                  try_kmp = false;
+                }
+            }
+
+          outer_loop_count++;
+          comparison_count++;
+          hg = mcel_scanz (iter_haystack);
+          hg.ch = c32tolower (hg.ch);
+          if (mcel_cmp (hg, ng) == 0)
+            /* The first character matches.  */
+            {
+              char const *rhaystack = iter_haystack + hg.len;
+              char const *rneedle = needle + ng.len;
+              mcel_t rhg, rng;
+              do
+                {
+                  if (!*rneedle)
+                    return (char *) iter_haystack;
+                  if (!*rhaystack)
+                    return NULL;
+                  rhg = mcel_scanz (rhaystack); rhaystack += rhg.len;
+                  rng = mcel_scanz (rneedle); rneedle += rng.len;
+                  comparison_count++;
+                }
+              while (mcel_tocmp (c32tolower, rhg, rng) == 0);
+            }
+        }
+
+      return NULL;
+#else
       mbui_iterator_t iter_needle;
 
       mbui_init (iter_needle, needle);
@@ -311,6 +460,7 @@ mbscasestr (const char *haystack, const char *needle)
         }
       else
         return (char *) haystack;
+#endif
     }
   else
     {
diff --git a/lib/mbsstr.c b/lib/mbsstr.c
index b8d15f9521..ca373bf81b 100644
--- a/lib/mbsstr.c
+++ b/lib/mbsstr.c
@@ -67,19 +67,19 @@ knuth_morris_pratt_multibyte (const char *haystack, const 
char *needle,
   size_t *table = table_memory = aligned;
 
   /* Fill needle_mbchars.  */
-  {
 #if GNULIB_MCEL_PREFER
-    for (size_t j = 0; *needle; needle += needle_mbchars[j++].len)
-      needle_mbchars[j] = mcel_scanz (needle);
+  for (size_t j = 0; *needle; needle += needle_mbchars[j++].len)
+    needle_mbchars[j] = mcel_scanz (needle);
 #else
+  {
     mbui_iterator_t iter;
     size_t j;
 
     j = 0;
     for (mbui_init (iter, needle); mbui_avail (iter); mbui_advance (iter), j++)
       mb_copy (&needle_mbchars[j], &mbui_cur (iter));
-#endif
   }
+#endif
 
   /* Fill the table.
      For 0 < i < m:
diff --git a/modules/mbscasestr b/modules/mbscasestr
index 15e925d6e1..7b2324717c 100644
--- a/modules/mbscasestr
+++ b/modules/mbscasestr
@@ -6,6 +6,7 @@ lib/mbscasestr.c
 lib/str-kmp.h
 
 Depends-on:
+alignasof
 mbuiter
 stdbool
 string
-- 
2.41.0


Reply via email to