Hi, After strcasecmp(), also strstr() can be made to work in multibyte locales. I committed the appended patch. (Another possible implementation would have been to call locale_charset() and compare its result to "BIG5", "GBK" and a few others, to exploit the fact that an UTF-8 string can use the bytewise search function.)
2005-08-17 Bruno Haible <[EMAIL PROTECTED]> * modules/strstr (Files): Add m4/mbrtowc.m4. (Depends-on): Add mbuiter. * lib/strstr.h: Ignore HAVE_STRSTR, always declare the gnulib function. * lib/strstr.c: Completely rewritten, with multibyte locale support. * m4/strstr.m4 (gl_FUNC_STRSTR): Use the replacement function always. (gl_PREREQ_STRSTR): Use gl_FUNC_MBRTOWC. Index: modules/strstr =================================================================== RCS file: /cvsroot/gnulib/gnulib/modules/strstr,v retrieving revision 1.5 diff -c -3 -r1.5 strstr *** modules/strstr 22 Sep 2004 15:11:04 -0000 1.5 --- modules/strstr 17 Aug 2005 14:01:49 -0000 *************** *** 5,12 **** --- 5,14 ---- lib/strstr.h lib/strstr.c m4/strstr.m4 + m4/mbrtowc.m4 Depends-on: + mbuiter configure.ac: gl_FUNC_STRSTR Index: lib/strstr.h =================================================================== RCS file: /cvsroot/gnulib/gnulib/lib/strstr.h,v retrieving revision 1.4 diff -c -3 -r1.4 strstr.h *** lib/strstr.h 14 May 2005 06:03:58 -0000 1.4 --- lib/strstr.h 17 Aug 2005 14:01:49 -0000 *************** *** 1,5 **** /* Searching in a string. ! Copyright (C) 2001-2003 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by --- 1,5 ---- /* Searching in a string. ! Copyright (C) 2001-2003, 2005 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by *************** *** 15,27 **** along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - #if HAVE_STRSTR - - /* Get strstr() declaration. */ - #include <string.h> - - #else - #ifdef __cplusplus extern "C" { #endif --- 15,20 ---- *************** *** 31,36 **** #ifdef __cplusplus } - #endif - #endif --- 24,27 ---- Index: lib/strstr.c =================================================================== RCS file: /cvsroot/gnulib/gnulib/lib/strstr.c,v retrieving revision 1.11 diff -c -3 -r1.11 strstr.c *** lib/strstr.c 14 May 2005 06:03:58 -0000 1.11 --- lib/strstr.c 17 Aug 2005 14:01:49 -0000 *************** *** 1,119 **** ! /* Copyright (C) 1994, 1999, 2002-2003 Free Software Foundation, Inc. ! This file is part of the GNU C Library. ! ! This program is free software; you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by ! the Free Software Foundation; either version 2, or (at your option) ! any later version. ! ! This program is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU General Public License for more details. ! ! You should have received a copy of the GNU General Public License ! along with this program; if not, write to the Free Software ! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ ! ! /* ! * My personal strstr() implementation that beats most other algorithms. ! * Until someone tells me otherwise, I assume that this is the ! * fastest implementation of strstr() in C. ! * I deliberately chose not to comment it. You should have at least ! * as much fun trying to understand it, as I had to write it :-). ! * ! * Stephen R. van den Berg, [EMAIL PROTECTED] */ #if HAVE_CONFIG_H # include <config.h> #endif ! #include <string.h> ! ! typedef unsigned chartype; ! #undef strstr char * ! strstr (const char *phaystack, const char *pneedle) { ! register const unsigned char *haystack, *needle; ! register chartype b, c; ! haystack = (const unsigned char *) phaystack; ! needle = (const unsigned char *) pneedle; ! b = *needle; ! if (b != '\0') { ! haystack--; /* possible ANSI violation */ ! do { ! c = *++haystack; ! if (c == '\0') ! goto ret0; ! } ! while (c != b); ! ! c = *++needle; ! if (c == '\0') ! goto foundneedle; ! ++needle; ! goto jin; ! ! for (;;) ! { ! register chartype a; ! register const unsigned char *rhaystack, *rneedle; ! do { ! a = *++haystack; ! if (a == '\0') ! goto ret0; ! if (a == b) ! break; ! a = *++haystack; ! if (a == '\0') ! goto ret0; ! shloop:; } ! while (a != b); ! ! jin: a = *++haystack; ! if (a == '\0') ! goto ret0; ! ! if (a != c) ! goto shloop; ! ! rhaystack = haystack-- + 1; ! rneedle = needle; ! a = *rneedle; ! ! if (*rhaystack == a) ! do ! { ! if (a == '\0') ! goto foundneedle; ! ++rhaystack; ! a = *++needle; ! if (*rhaystack != a) ! break; ! if (a == '\0') ! goto foundneedle; ! ++rhaystack; ! a = *++needle; ! } ! while (*rhaystack == a); ! ! needle = rneedle; /* took the register-poor approach */ ! ! if (a == '\0') ! break; ! } } - foundneedle: - return (char*) haystack; - ret0: - return 0; } --- 1,126 ---- ! /* Searching in a string. ! Copyright (C) 2005 Free Software Foundation, Inc. ! Written by Bruno Haible <[EMAIL PROTECTED]>, 2005. ! ! This program is free software; you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by ! the Free Software Foundation; either version 2, or (at your option) ! any later version. ! ! This program is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU General Public License for more details. ! ! You should have received a copy of the GNU General Public License ! along with this program; if not, write to the Free Software Foundation, ! Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #if HAVE_CONFIG_H # include <config.h> #endif ! /* Specification. */ ! #include "strstr.h" ! #if HAVE_MBRTOWC ! # include "mbuiter.h" ! #endif + /* Find the first occurrence of NEEDLE in HAYSTACK. */ char * ! strstr (const char *haystack, const char *needle) { ! /* Be careful not to look at the entire extent of haystack or needle ! until needed. This is useful because of these two cases: ! - haystack may be very long, and a match of needle found early, ! - needle may be very long, and not even a short initial segment of ! needle may be found in haystack. */ ! #if HAVE_MBRTOWC ! if (MB_CUR_MAX > 1) ! { ! mbui_iterator_t iter_needle; ! mbui_init (iter_needle, needle); ! if (mbui_avail (iter_needle)) ! { ! mbui_iterator_t iter_haystack; ! mbui_init (iter_haystack, haystack); ! for (;; mbui_advance (iter_haystack)) ! { ! if (!mbui_avail (iter_haystack)) ! /* No match. */ ! return NULL; ! ! if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle))) ! /* The first character matches. */ ! { ! mbui_iterator_t rhaystack; ! mbui_iterator_t rneedle; ! ! memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t)); ! mbui_advance (rhaystack); ! ! mbui_init (rneedle, needle); ! if (!mbui_avail (rneedle)) ! abort (); ! mbui_advance (rneedle); ! ! for (;; mbui_advance (rhaystack), mbui_advance (rneedle)) ! { ! if (!mbui_avail (rneedle)) ! /* Found a match. */ ! return (char *) haystack; ! if (!mbui_avail (rhaystack)) ! /* No match. */ ! return NULL; ! if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle))) ! /* Nothing in this round. */ ! break; ! } ! } ! } ! } ! else ! return (char *) haystack; ! } ! else ! #endif { ! if (*needle != '\0') { ! /* Speed up the following searches of needle by caching its first ! character. */ ! char b = *needle++; ! for (;; haystack++) { ! if (*haystack == '\0') ! /* No match. */ ! return NULL; ! if (*haystack == b) ! /* The first character matches. */ ! { ! const char *rhaystack = haystack + 1; ! const char *rneedle = needle; ! ! for (;; rhaystack++, rneedle++) ! { ! if (*rneedle == '\0') ! /* Found a match. */ ! return (char *) haystack; ! if (*rhaystack == '\0') ! /* No match. */ ! return NULL; ! if (*rhaystack != *rneedle) ! /* Nothing in this round. */ ! break; ! } ! } ! } ! } ! else ! return (char *) haystack; } } Index: m4/strstr.m4 =================================================================== RCS file: /cvsroot/gnulib/gnulib/m4/strstr.m4,v retrieving revision 1.3 diff -c -3 -r1.3 strstr.m4 *** m4/strstr.m4 18 Jan 2005 13:07:56 -0000 1.3 --- m4/strstr.m4 17 Aug 2005 14:01:49 -0000 *************** *** 1,16 **** ! # strstr.m4 serial 2 ! dnl Copyright (C) 2002-2003 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_STRSTR], [ ! AC_REPLACE_FUNCS(strstr) ! if test $ac_cv_func_strstr = no; then ! gl_PREREQ_STRSTR ! fi ]) # Prerequisites of lib/strstr.c. ! AC_DEFUN([gl_PREREQ_STRSTR], [:]) --- 1,19 ---- ! # strstr.m4 serial 3 ! dnl Copyright (C) 2002-2003, 2005 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_STRSTR], [ ! dnl No known system has a strstr() function that works correctly in ! dnl multibyte locales. Therefore we use our version always. ! AC_LIBOBJ(strstr) ! AC_DEFINE(strstr, rpl_strstr, [Define to rpl_strstr always.]) ! gl_PREREQ_STRSTR ]) # Prerequisites of lib/strstr.c. ! AC_DEFUN([gl_PREREQ_STRSTR], [ ! gl_FUNC_MBRTOWC ! ]) _______________________________________________ bug-gnulib mailing list bug-gnulib@gnu.org http://lists.gnu.org/mailman/listinfo/bug-gnulib