Now that the wcsstr function in gnulib is O(n), thanks to the two-way algorithm,
it's time to apply the same optimization to u16_strstr and u32_strstr. Both
so far used the Knuth-Morris-Pratt algorithm, that is O(n) as well in regular
situations but falls back to O(n²) if there is out-of-memory condition.

Done as follows.


2023-04-02  Bruno Haible  <br...@clisp.org>

        unistr/u8-strstr: Simplify code.
        * lib/unistr/u8-strstr.c: Inline the contents of lib/unistr/u-strstr.h.
        * lib/unistr/u-strstr.h: Remove file.
        * modules/unistr/u8-strstr (Files): Remove it.

        unistr/u{16,32}-strstr: Use two-way algorithm (no memory allocation).
        * lib/wcs-two-way.h: Use UNIT instead of wchar_t. Don't undefine
        RETURN_TYPE.
        * lib/wcsstr-impl.h: Move the non-linear implementation away. Use UNIT
        instead of wchar_t, RETURN_TYPE instead of 'wchar_t *', FUNC instead of
        wcsstr.
        (AVAILABLE): Use MEMCHR0 instead of wmemchr.
        (FUNC): Use STRCHR instead of wcschr.
        * lib/wcsstr.c: Moved the non-linear implementation to here.
        (FUNC, UNIT, RETURN_TYPE, MEMCHR0, STRCHR): New macros.
        * lib/unistr/u16-strstr.c: Don't include malloca.h, str-kmp.h,
        u-strstr.h. Instead, include wcsstr-impl.h.
        * lib/unistr/u32-strstr.c: Likewise.
        * modules/unistr/u16-strstr (Files): Remove u-strstr.h, str-kmp.h. Add
        wcsstr-impl.h, wcs-two-way.h.
        (Depends-on): Remove u16-strmbtouc, u16-strlen, u16-strnlen, malloca.
        Add u16-chr, u16-cmp.
        * modules/unistr/u32-strstr (Files): Remove u-strstr.h, str-kmp.h. Add
        wcsstr-impl.h, wcs-two-way.h.
        (Depends-on): Remove u32-strlen, u32-strnlen, malloca. Add u32-chr,
        u32-cmp.

>From 4a3872f5902158b6a0d5f9bac02620da63639d90 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Sun, 2 Apr 2023 16:07:36 +0200
Subject: [PATCH 1/2] unistr/u{16,32}-strstr: Use two-way algorithm (no memory
 allocation).

* lib/wcs-two-way.h: Use UNIT instead of wchar_t. Don't undefine
RETURN_TYPE.
* lib/wcsstr-impl.h: Move the non-linear implementation away. Use UNIT
instead of wchar_t, RETURN_TYPE instead of 'wchar_t *', FUNC instead of
wcsstr.
(AVAILABLE): Use MEMCHR0 instead of wmemchr.
(FUNC): Use STRCHR instead of wcschr.
* lib/wcsstr.c: Moved the non-linear implementation to here.
(FUNC, UNIT, RETURN_TYPE, MEMCHR0, STRCHR): New macros.
* lib/unistr/u16-strstr.c: Don't include malloca.h, str-kmp.h,
u-strstr.h. Instead, include wcsstr-impl.h.
* lib/unistr/u32-strstr.c: Likewise.
* modules/unistr/u16-strstr (Files): Remove u-strstr.h, str-kmp.h. Add
wcsstr-impl.h, wcs-two-way.h.
(Depends-on): Remove u16-strmbtouc, u16-strlen, u16-strnlen, malloca.
Add u16-chr, u16-cmp.
* modules/unistr/u32-strstr (Files): Remove u-strstr.h, str-kmp.h. Add
wcsstr-impl.h, wcs-two-way.h.
(Depends-on): Remove u32-strlen, u32-strnlen, malloca. Add u32-chr,
u32-cmp.
---
 ChangeLog                 | 24 +++++++++++++
 lib/unistr/u16-strstr.c   | 18 +++-------
 lib/unistr/u32-strstr.c   | 15 +++-----
 lib/wcs-two-way.h         | 23 ++++++-------
 lib/wcsstr-impl.h         | 72 +++++++--------------------------------
 lib/wcsstr.c              | 54 ++++++++++++++++++++++++++++-
 modules/unistr/u16-strstr | 10 +++---
 modules/unistr/u32-strstr |  9 +++--
 8 files changed, 119 insertions(+), 106 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index ddc568db03..f933d1e94d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,27 @@
+2023-04-02  Bruno Haible  <br...@clisp.org>
+
+	unistr/u{16,32}-strstr: Use two-way algorithm (no memory allocation).
+	* lib/wcs-two-way.h: Use UNIT instead of wchar_t. Don't undefine
+	RETURN_TYPE.
+	* lib/wcsstr-impl.h: Move the non-linear implementation away. Use UNIT
+	instead of wchar_t, RETURN_TYPE instead of 'wchar_t *', FUNC instead of
+	wcsstr.
+	(AVAILABLE): Use MEMCHR0 instead of wmemchr.
+	(FUNC): Use STRCHR instead of wcschr.
+	* lib/wcsstr.c: Moved the non-linear implementation to here.
+	(FUNC, UNIT, RETURN_TYPE, MEMCHR0, STRCHR): New macros.
+	* lib/unistr/u16-strstr.c: Don't include malloca.h, str-kmp.h,
+	u-strstr.h. Instead, include wcsstr-impl.h.
+	* lib/unistr/u32-strstr.c: Likewise.
+	* modules/unistr/u16-strstr (Files): Remove u-strstr.h, str-kmp.h. Add
+	wcsstr-impl.h, wcs-two-way.h.
+	(Depends-on): Remove u16-strmbtouc, u16-strlen, u16-strnlen, malloca.
+	Add u16-chr, u16-cmp.
+	* modules/unistr/u32-strstr (Files): Remove u-strstr.h, str-kmp.h. Add
+	wcsstr-impl.h, wcs-two-way.h.
+	(Depends-on): Remove u32-strlen, u32-strnlen, malloca. Add u32-chr,
+	u32-cmp.
+
 2023-04-02  Bruno Haible  <br...@clisp.org>
 
 	unistr/u*strstr tests: Add more tests.
diff --git a/lib/unistr/u16-strstr.c b/lib/unistr/u16-strstr.c
index 5a21a1f63d..2be6cd5744 100644
--- a/lib/unistr/u16-strstr.c
+++ b/lib/unistr/u16-strstr.c
@@ -28,18 +28,10 @@
 /* Specification.  */
 #include "unistr.h"
 
-#include "malloca.h"
-
-/* FIXME: Maybe walking the string via u16_mblen is a win?  */
-
 #define UNIT uint16_t
-
-#define CANON_ELEMENT(c) c
-#include "str-kmp.h"
-
 #define FUNC u16_strstr
-#define U_STRCHR u16_strchr
-#define U_STRMBTOUC u16_strmbtouc
-#define U_STRLEN u16_strlen
-#define U_STRNLEN u16_strnlen
-#include "u-strstr.h"
+#define RETURN_TYPE uint16_t *
+#define MEMCHR0(s, n) u16_chr (s, n, 0)
+#define STRCHR u16_strchr
+#define CMP_FUNC u16_cmp
+#include "wcsstr-impl.h"
diff --git a/lib/unistr/u32-strstr.c b/lib/unistr/u32-strstr.c
index 261d93afb3..62f04d0cdf 100644
--- a/lib/unistr/u32-strstr.c
+++ b/lib/unistr/u32-strstr.c
@@ -28,15 +28,10 @@
 /* Specification.  */
 #include "unistr.h"
 
-#include "malloca.h"
-
 #define UNIT uint32_t
-
-#define CANON_ELEMENT(c) c
-#include "str-kmp.h"
-
 #define FUNC u32_strstr
-#define U_STRCHR u32_strchr
-#define U_STRLEN u32_strlen
-#define U_STRNLEN u32_strnlen
-#include "u-strstr.h"
+#define RETURN_TYPE uint32_t *
+#define MEMCHR0(s, n) u32_chr (s, n, 0)
+#define STRCHR u32_strchr
+#define CMP_FUNC u32_cmp
+#include "wcsstr-impl.h"
diff --git a/lib/wcs-two-way.h b/lib/wcs-two-way.h
index 0879026f2b..b160f90c4a 100644
--- a/lib/wcs-two-way.h
+++ b/lib/wcs-two-way.h
@@ -18,23 +18,23 @@
 
 /* Before including this file, you need to include <config.h> and
    <string.h>, and define:
+     UNIT                    The element type of the needle and haystack.
      RETURN_TYPE             A macro that expands to the return type.
      AVAILABLE(h, h_l, j, n_l)
                              A macro that returns nonzero if there are
                              at least N_L characters left starting at H[J].
-                             H is 'wchar_t *', H_L, J, and N_L
-                             are 'size_t'; H_L is an lvalue.  For
-                             NUL-terminated searches, H_L can be
-                             modified each iteration to avoid having
-                             to compute the end of H up front.
+                             H is 'UNIT *', H_L, J, and N_L are 'size_t';
+                             H_L is an lvalue.  For NUL-terminated searches,
+                             H_L can be modified each iteration to avoid
+                             having to compute the end of H up front.
 
   For case-insensitivity, you may optionally define:
      CMP_FUNC(p1, p2, l)     A macro that returns 0 iff the first L
                              characters of P1 and P2 are equal.
      CANON_ELEMENT(c)        A macro that canonicalizes an element right after
                              it has been fetched from one of the two strings.
-                             The argument is a 'wchar_t'; the result
-                             must be a 'wchar_t' as well.
+                             The argument is a 'UNIT'; the result must be a
+                             'UNIT' as well.
 
   This file undefines the macros documented above, and defines
   LONG_NEEDLE_THRESHOLD.
@@ -88,7 +88,7 @@
    suffixes are determined by lexicographic comparison while tracking
    periodicity.  */
 static size_t
-critical_factorization (const wchar_t *needle, size_t needle_len,
+critical_factorization (const UNIT *needle, size_t needle_len,
                         size_t *period)
 {
   /* Index of last character of left half, or SIZE_MAX.  */
@@ -96,7 +96,7 @@ critical_factorization (const wchar_t *needle, size_t needle_len,
   size_t j; /* Index into NEEDLE for current candidate suffix.  */
   size_t k; /* Offset into current period.  */
   size_t p; /* Intermediate period.  */
-  wchar_t a, b; /* Current comparison characters.  */
+  UNIT a, b; /* Current comparison characters.  */
 
   /* Special case NEEDLE_LEN of 1 or 2 (all callers already filtered
      out 0-length needles.  */
@@ -215,8 +215,8 @@ critical_factorization (const wchar_t *needle, size_t needle_len,
    If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
    HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.  */
 static RETURN_TYPE _GL_ATTRIBUTE_PURE
-two_way_short_needle (const wchar_t *haystack, size_t haystack_len,
-                      const wchar_t *needle, size_t needle_len)
+two_way_short_needle (const UNIT *haystack, size_t haystack_len,
+                      const UNIT *needle, size_t needle_len)
 {
   size_t i; /* Index into current character of NEEDLE.  */
   size_t j; /* Index into current window of HAYSTACK.  */
@@ -300,4 +300,3 @@ two_way_short_needle (const wchar_t *haystack, size_t haystack_len,
 #undef CANON_ELEMENT
 #undef CMP_FUNC
 #undef MAX
-#undef RETURN_TYPE
diff --git a/lib/wcsstr-impl.h b/lib/wcsstr-impl.h
index ccd64672b4..f889c26b11 100644
--- a/lib/wcsstr-impl.h
+++ b/lib/wcsstr-impl.h
@@ -14,23 +14,18 @@
    You should have received a copy of the GNU Lesser General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
 
-/* Written by Bruno Haible, 1999, and Eric Blake, 2008.  */
+/* Written by Eric Blake, 2008.  */
 
-#if NEED_LINEAR_WCSSTR
+#define AVAILABLE(h, h_l, j, n_l)                       \
+  (!MEMCHR0 ((h) + (h_l), (j) + (n_l) - (h_l))          \
+   && ((h_l) = (j) + (n_l)))
+#include "wcs-two-way.h"
 
-/* Use Two-Way algorithm, worst-case O(n+m).  */
-
-# define RETURN_TYPE wchar_t *
-# define AVAILABLE(h, h_l, j, n_l)                       \
-   (!wmemchr ((h) + (h_l), L'\0', (j) + (n_l) - (h_l))   \
-    && ((h_l) = (j) + (n_l)))
-# include "wcs-two-way.h"
-
-wchar_t *
-wcsstr (const wchar_t *haystack_start, const wchar_t *needle_start)
+RETURN_TYPE
+FUNC (const UNIT *haystack_start, const UNIT *needle_start)
 {
-  const wchar_t *haystack = haystack_start;
-  const wchar_t *needle = needle_start;
+  const UNIT *haystack = haystack_start;
+  const UNIT *needle = needle_start;
   size_t needle_len; /* Length of NEEDLE.  */
   size_t haystack_len; /* Known minimum length of HAYSTACK.  */
   bool ok = true; /* True if NEEDLE is prefix of HAYSTACK.  */
@@ -43,14 +38,14 @@ wcsstr (const wchar_t *haystack_start, const wchar_t *needle_start)
   if (*needle)
     return NULL;
   if (ok)
-    return (wchar_t *) haystack_start;
+    return (RETURN_TYPE) haystack_start;
 
-  /* Reduce the size of haystack using wcschr, since it has a smaller
+  /* Reduce the size of haystack using STRCHR, since it has a smaller
      linear coefficient than the Two-Way algorithm.  */
   needle_len = needle - needle_start;
-  haystack = wcschr (haystack_start + 1, *needle_start);
+  haystack = STRCHR (haystack_start + 1, *needle_start);
   if (!haystack || __builtin_expect (needle_len == 1, 0))
-    return (wchar_t *) haystack;
+    return (RETURN_TYPE) haystack;
   needle -= needle_len;
   haystack_len = (haystack > haystack_start + needle_len ? 1
                   : needle_len + haystack_start - haystack);
@@ -59,44 +54,3 @@ wcsstr (const wchar_t *haystack_start, const wchar_t *needle_start)
   return two_way_short_needle (haystack, haystack_len,
                                needle, needle_len);
 }
-
-#else
-
-/* Use simple implementation, worst-case O(nm).  */
-
-wchar_t *
-wcsstr (const wchar_t *haystack, const wchar_t *needle)
-{
-  wchar_t n = needle[0];
-
-  /* Is needle empty?  */
-  if (n == (wchar_t)'\0')
-    return (wchar_t *) haystack;
-
-  /* Is needle nearly empty?  */
-  if (needle[1] == (wchar_t)'\0')
-    return wcschr (haystack, n);
-
-  /* Search for needle's first character.  */
-  for (; *haystack != (wchar_t)'\0'; haystack++)
-    {
-      if (*haystack == n)
-        {
-          /* Compare with needle's remaining characters.  */
-          const wchar_t *hptr = haystack + 1;
-          const wchar_t *nptr = needle + 1;
-          for (;;)
-            {
-              if (*hptr != *nptr)
-                break;
-              hptr++; nptr++;
-              if (*nptr == (wchar_t)'\0')
-                return (wchar_t *) haystack;
-            }
-        }
-    }
-
-  return NULL;
-}
-
-#endif
diff --git a/lib/wcsstr.c b/lib/wcsstr.c
index 47a947d02e..6c73f2df5f 100644
--- a/lib/wcsstr.c
+++ b/lib/wcsstr.c
@@ -15,9 +15,61 @@
    You should have received a copy of the GNU Lesser General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
 
+/* Written by Bruno Haible, 1999.  */
+
 #include <config.h>
 
 /* Specification.  */
 #include <wchar.h>
 
-#include "wcsstr-impl.h"
+#if NEED_LINEAR_WCSSTR
+
+/* Use Two-Way algorithm, worst-case O(n+m).  */
+
+# define FUNC wcsstr
+# define UNIT wchar_t
+# define RETURN_TYPE wchar_t *
+# define MEMCHR0(s, n) wmemchr (s, 0, n)
+# define STRCHR wcschr
+# include "wcsstr-impl.h"
+
+#else
+
+/* Use simple implementation, worst-case O(nm).  */
+
+wchar_t *
+wcsstr (const wchar_t *haystack, const wchar_t *needle)
+{
+  wchar_t n = needle[0];
+
+  /* Is needle empty?  */
+  if (n == (wchar_t)'\0')
+    return (wchar_t *) haystack;
+
+  /* Is needle nearly empty?  */
+  if (needle[1] == (wchar_t)'\0')
+    return wcschr (haystack, n);
+
+  /* Search for needle's first character.  */
+  for (; *haystack != (wchar_t)'\0'; haystack++)
+    {
+      if (*haystack == n)
+        {
+          /* Compare with needle's remaining characters.  */
+          const wchar_t *hptr = haystack + 1;
+          const wchar_t *nptr = needle + 1;
+          for (;;)
+            {
+              if (*hptr != *nptr)
+                break;
+              hptr++; nptr++;
+              if (*nptr == (wchar_t)'\0')
+                return (wchar_t *) haystack;
+            }
+        }
+    }
+
+  return NULL;
+}
+
+#endif
diff --git a/modules/unistr/u16-strstr b/modules/unistr/u16-strstr
index 501bee5303..1ad2e4eec3 100644
--- a/modules/unistr/u16-strstr
+++ b/modules/unistr/u16-strstr
@@ -3,17 +3,15 @@ Substring test for UTF-16 strings.
 
 Files:
 lib/unistr/u16-strstr.c
-lib/unistr/u-strstr.h
-lib/str-kmp.h
+lib/wcsstr-impl.h
+lib/wcs-two-way.h
 
 Depends-on:
 unistr/base
+unistr/u16-chr
 unistr/u16-strchr
-unistr/u16-strmbtouc
-unistr/u16-strlen
-unistr/u16-strnlen
+unistr/u16-cmp
 stdbool
-malloca
 
 configure.ac:
 gl_LIBUNISTRING_MODULE([0.9.4], [unistr/u16-strstr])
diff --git a/modules/unistr/u32-strstr b/modules/unistr/u32-strstr
index 1fb2fcf498..64d95434b1 100644
--- a/modules/unistr/u32-strstr
+++ b/modules/unistr/u32-strstr
@@ -3,16 +3,15 @@ Substring test for UTF-32 strings.
 
 Files:
 lib/unistr/u32-strstr.c
-lib/unistr/u-strstr.h
-lib/str-kmp.h
+lib/wcsstr-impl.h
+lib/wcs-two-way.h
 
 Depends-on:
 unistr/base
+unistr/u32-chr
 unistr/u32-strchr
-unistr/u32-strlen
-unistr/u32-strnlen
+unistr/u32-cmp
 stdbool
-malloca
 
 configure.ac:
 gl_LIBUNISTRING_MODULE([0.9.4], [unistr/u32-strstr])
-- 
2.34.1

From 1ca053ce4128c8a8fbd0ece8b4a26ec5ce791933 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Sun, 2 Apr 2023 16:10:05 +0200
Subject: [PATCH 2/2] unistr/u8-strstr: Simplify code.

* lib/unistr/u8-strstr.c: Inline the contents of lib/unistr/u-strstr.h.
* lib/unistr/u-strstr.h: Remove file.
* modules/unistr/u8-strstr (Files): Remove it.
---
 ChangeLog                |   5 ++
 lib/unistr/u-strstr.h    | 139 ---------------------------------------
 lib/unistr/u8-strstr.c   |  31 ++++++---
 modules/unistr/u8-strstr |   1 -
 4 files changed, 28 insertions(+), 148 deletions(-)
 delete mode 100644 lib/unistr/u-strstr.h

diff --git a/ChangeLog b/ChangeLog
index f933d1e94d..463274abee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2023-04-02  Bruno Haible  <br...@clisp.org>
 
+	unistr/u8-strstr: Simplify code.
+	* lib/unistr/u8-strstr.c: Inline the contents of lib/unistr/u-strstr.h.
+	* lib/unistr/u-strstr.h: Remove file.
+	* modules/unistr/u8-strstr (Files): Remove it.
+
 	unistr/u{16,32}-strstr: Use two-way algorithm (no memory allocation).
 	* lib/wcs-two-way.h: Use UNIT instead of wchar_t. Don't undefine
 	RETURN_TYPE.
diff --git a/lib/unistr/u-strstr.h b/lib/unistr/u-strstr.h
deleted file mode 100644
index c0a1435587..0000000000
--- a/lib/unistr/u-strstr.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* Substring test for UTF-8/UTF-16/UTF-32 strings.  -*- coding: utf-8 -*-
-   Copyright (C) 1999, 2002, 2006, 2010-2023 Free Software Foundation, Inc.
-   Written by Bruno Haible <br...@clisp.org>, 2002, 2005.
-
-   This file is free software.
-   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
-   You can redistribute it and/or modify it under either
-     - the terms of the GNU Lesser General Public License as published
-       by the Free Software Foundation, either version 3, or (at your
-       option) any later version, or
-     - the terms of the GNU General Public License as published by the
-       Free Software Foundation; either version 2, or (at your option)
-       any later version, or
-     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
-
-   This file is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License and the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License and of the GNU General Public License along with this
-   program.  If not, see <https://www.gnu.org/licenses/>.  */
-
-UNIT *
-FUNC (const UNIT *haystack, const UNIT *needle)
-{
-  UNIT first = needle[0];
-
-  /* Is needle empty?  */
-  if (first == 0)
-    return (UNIT *) haystack;
-
-  /* Is needle nearly empty (only one unit)?  */
-  if (needle[1] == 0)
-    return U_STRCHR (haystack, first);
-
-#ifdef U_STRMBTOUC
-  /* Is needle nearly empty (only one character)?  */
-  {
-    ucs4_t first_uc;
-    int count = U_STRMBTOUC (&first_uc, needle);
-    if (count > 0 && needle[count] == 0)
-      return U_STRCHR (haystack, first_uc);
-  }
-#endif
-
-#if UNIT_IS_UINT8_T
-  return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
-#else
-  {
-    /* Minimizing the worst-case complexity:
-       Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
-       The naïve algorithm is O(n*m) worst-case.
-       The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
-       memory allocation.
-       To achieve linear complexity and yet amortize the cost of the
-       memory allocation, we activate the Knuth-Morris-Pratt algorithm
-       only once the naïve algorithm has already run for some time; more
-       precisely, when
-         - the outer loop count is >= 10,
-         - the average number of comparisons per outer loop is >= 5,
-         - the total number of comparisons is >= m.
-       But we try it only once.  If the memory allocation attempt failed,
-       we don't retry it.  */
-    bool try_kmp = true;
-    size_t outer_loop_count = 0;
-    size_t comparison_count = 0;
-    size_t last_ccount = 0;                  /* last comparison count */
-    const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */
-
-    /* Speed up the following searches of needle by caching its first
-       character.  */
-    UNIT b = *needle++;
-
-    for (;; haystack++)
-      {
-        if (*haystack == 0)
-          /* No match.  */
-          return NULL;
-
-        /* See whether it's advisable to use an asymptotically faster
-           algorithm.  */
-        if (try_kmp
-            && outer_loop_count >= 10
-            && comparison_count >= 5 * outer_loop_count)
-          {
-            /* See if needle + comparison_count now reaches the end of
-               needle.  */
-            if (needle_last_ccount != NULL)
-              {
-                needle_last_ccount +=
-                  U_STRNLEN (needle_last_ccount,
-                             comparison_count - last_ccount);
-                if (*needle_last_ccount == 0)
-                  needle_last_ccount = NULL;
-                last_ccount = comparison_count;
-              }
-            if (needle_last_ccount == NULL)
-              {
-                /* Try the Knuth-Morris-Pratt algorithm.  */
-                const UNIT *result;
-                bool success =
-                  knuth_morris_pratt (haystack,
-                                      needle - 1, U_STRLEN (needle - 1),
-                                      &result);
-                if (success)
-                  return (UNIT *) result;
-                try_kmp = false;
-              }
-          }
-
-        outer_loop_count++;
-        comparison_count++;
-        if (*haystack == b)
-          /* The first character matches.  */
-          {
-            const UNIT *rhaystack = haystack + 1;
-            const UNIT *rneedle = needle;
-
-            for (;; rhaystack++, rneedle++)
-              {
-                if (*rneedle == 0)
-                  /* Found a match.  */
-                  return (UNIT *) haystack;
-                if (*rhaystack == 0)
-                  /* No match.  */
-                  return NULL;
-                comparison_count++;
-                if (*rhaystack != *rneedle)
-                  /* Nothing in this round.  */
-                  break;
-              }
-          }
-      }
-  }
-#endif
-}
diff --git a/lib/unistr/u8-strstr.c b/lib/unistr/u8-strstr.c
index 13dceca57e..fbb483ac24 100644
--- a/lib/unistr/u8-strstr.c
+++ b/lib/unistr/u8-strstr.c
@@ -30,11 +30,26 @@
 
 #include <string.h>
 
-/* FIXME: Maybe walking the string via u8_mblen is a win?  */
-
-#define FUNC u8_strstr
-#define UNIT uint8_t
-#define U_STRCHR u8_strchr
-#define U_STRMBTOUC u8_strmbtouc
-#define UNIT_IS_UINT8_T 1
-#include "u-strstr.h"
+uint8_t *
+u8_strstr (const uint8_t *haystack, const uint8_t *needle)
+{
+  uint8_t first = needle[0];
+
+  /* Is needle empty?  */
+  if (first == 0)
+    return (uint8_t *) haystack;
+
+  /* Is needle nearly empty (only one unit)?  */
+  if (needle[1] == 0)
+    return u8_strchr (haystack, first);
+
+  /* Is needle nearly empty (only one character)?  */
+  {
+    ucs4_t first_uc;
+    int count = u8_strmbtouc (&first_uc, needle);
+    if (count > 0 && needle[count] == 0)
+      return u8_strchr (haystack, first_uc);
+  }
+
+  return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
+}
diff --git a/modules/unistr/u8-strstr b/modules/unistr/u8-strstr
index 8cd99690e7..87fc9d6685 100644
--- a/modules/unistr/u8-strstr
+++ b/modules/unistr/u8-strstr
@@ -3,7 +3,6 @@ Substring test for UTF-8 strings.
 
 Files:
 lib/unistr/u8-strstr.c
-lib/unistr/u-strstr.h
 
 Depends-on:
 unistr/base
-- 
2.34.1

Reply via email to