Yoann Vandoorselaere wrote:
> As you can see from the code, this strcasestr implementation rely on
> strcasecmp (I did not  bothered to use the GLIBC implementation).
>
> Thus, wouldn't it be wise to fix strcasecmp itself, and include it
> unconditionally in GnuLib ?

Yes. Here's a patch to that effect. (Completely untested so far. Also
quite inefficient. One can do better with mbrtowc().)

But note that this doesn't help you much in producing a correct
strcasestr() function, because
  - when you iterate over a strings in BIG5 or GBK encoding, you must
    iterate one character per step, not one byte per step,
  - you don't know a priori how many bytes to pass to strcasecmp() in order
    to get a match,

Bruno


Index: modules/strcase
===================================================================
RCS file: /cvsroot/gnulib/gnulib/modules/strcase,v
retrieving revision 1.5
diff -c -3 -r1.5 strcase
*** modules/strcase	22 Sep 2004 15:11:04 -0000	1.5
--- modules/strcase	15 Aug 2005 17:13:39 -0000
***************
*** 8,13 ****
--- 8,14 ----
  m4/strcase.m4
  
  Depends-on:
+ allocsa
  
  configure.ac:
  gl_STRCASE
Index: lib/strcase.h
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/strcase.h,v
retrieving revision 1.4
diff -c -3 -r1.4 strcase.h
*** lib/strcase.h	14 May 2005 06:03:58 -0000	1.4
--- lib/strcase.h	15 Aug 2005 17:13:39 -0000
***************
*** 1,5 ****
  /* Case-insensitive string comparison functions.
!    Copyright (C) 1995-1996, 2001, 2003 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
--- 1,5 ----
  /* Case-insensitive string comparison functions.
!    Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
***************
*** 29,35 ****
  /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
     greater than zero if S1 is lexicographically less than, equal to or greater
     than S2.
!    Note: This function does not work correctly in multibyte locales.  */
  extern int strcasecmp (const char *s1, const char *s2);
  
  /* Compare no more than N characters of strings S1 and S2, ignoring case,
--- 29,36 ----
  /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
     greater than zero if S1 is lexicographically less than, equal to or greater
     than S2.
!    Note: This function may, in multibyte locales, return 0 for strings of
!    different lengths!  */
  extern int strcasecmp (const char *s1, const char *s2);
  
  /* Compare no more than N characters of strings S1 and S2, ignoring case,
Index: lib/strncasecmp.c
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/strncasecmp.c,v
retrieving revision 1.4
diff -c -3 -r1.4 strncasecmp.c
*** lib/strncasecmp.c	2 Jan 1999 15:55:44 -0000	1.4
--- lib/strncasecmp.c	15 Aug 2005 17:13:39 -0000
***************
*** 1,2 ****
! #define LENGTH_LIMIT
! #include "strcasecmp.c"
--- 1,58 ----
! /* strncasecmp.c -- case insensitive string comparator
!    Copyright (C) 1998, 1999 Free Software Foundation, Inc.
! 
!    This program is free software; you can redistribute it and/or modify
!    it under the terms of the GNU General Public License as published by
!    the Free Software Foundation; either version 2, or (at your option)
!    any later version.
! 
!    This program is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU General Public License for more details.
! 
!    You should have received a copy of the GNU General Public License
!    along with this program; if not, write to the Free Software Foundation,
!    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
! 
! #if HAVE_CONFIG_H
! # include <config.h>
! #endif
! 
! /* Specification.  */
! #include "strcase.h"
! 
! #include <ctype.h>
! 
! #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
! 
! /* Compare no more than N bytes of strings S1 and S2,
!    ignoring case, returning less than, equal to or
!    greater than zero if S1 is lexicographically less
!    than, equal to or greater than S2.  */
! 
! int
! strncasecmp (const char *s1, const char *s2, size_t n)
! {
!   register const unsigned char *p1 = (const unsigned char *) s1;
!   register const unsigned char *p2 = (const unsigned char *) s2;
!   unsigned char c1, c2;
! 
!   if (p1 == p2 || n == 0)
!     return 0;
! 
!   do
!     {
!       c1 = TOLOWER (*p1);
!       c2 = TOLOWER (*p2);
! 
!       if (--n == 0 || c1 == '\0')
! 	break;
! 
!       ++p1;
!       ++p2;
!     }
!   while (c1 == c2);
! 
!   return c1 - c2;
! }
Index: lib/strcasecmp.c
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/strcasecmp.c,v
retrieving revision 1.6
diff -c -3 -r1.6 strcasecmp.c
*** lib/strcasecmp.c	14 May 2005 06:03:58 -0000	1.6
--- lib/strcasecmp.c	15 Aug 2005 17:13:39 -0000
***************
*** 1,5 ****
! /* strcasecmp.c -- case insensitive string comparator
!    Copyright (C) 1998, 1999 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
--- 1,7 ----
! /* Case-insensitive string comparison function.
!    Copyright (C) 1998, 1999, 2005 Free Software Foundation, Inc.
!    Written by Bruno Haible <[EMAIL PROTECTED]>, 2005,
!    based on earlier glibc code.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
***************
*** 15,66 ****
     along with this program; if not, write to the Free Software Foundation,
     Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  
! #if HAVE_CONFIG_H
  # include <config.h>
  #endif
  
! #ifdef LENGTH_LIMIT
! # define STRXCASECMP_FUNCTION strncasecmp
! # define STRXCASECMP_DECLARE_N , size_t n
! # define LENGTH_LIMIT_EXPR(Expr) Expr
! #else
! # define STRXCASECMP_FUNCTION strcasecmp
! # define STRXCASECMP_DECLARE_N /* empty */
! # define LENGTH_LIMIT_EXPR(Expr) 0
! #endif
  
- #include <stddef.h>
  #include <ctype.h>
  
! #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
  
! /* Compare {{no more than N characters of }}strings S1 and S2,
!    ignoring case, returning less than, equal to or
!    greater than zero if S1 is lexicographically less
!    than, equal to or greater than S2.  */
  
! int
! STRXCASECMP_FUNCTION (const char *s1, const char *s2 STRXCASECMP_DECLARE_N)
! {
!   register const unsigned char *p1 = (const unsigned char *) s1;
!   register const unsigned char *p2 = (const unsigned char *) s2;
!   unsigned char c1, c2;
  
!   if (p1 == p2 || LENGTH_LIMIT_EXPR (n == 0))
!     return 0;
  
!   do
      {
!       c1 = TOLOWER (*p1);
!       c2 = TOLOWER (*p2);
  
!       if (LENGTH_LIMIT_EXPR (--n == 0) || c1 == '\0')
  	break;
  
!       ++p1;
!       ++p2;
      }
!   while (c1 == c2);
  
!   return c1 - c2;
  }
--- 17,151 ----
     along with this program; if not, write to the Free Software Foundation,
     Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  
! #ifdef HAVE_CONFIG_H
  # include <config.h>
  #endif
  
! /* Specification.  */
! #include "strcase.h"
  
  #include <ctype.h>
+ #include <stdlib.h>
  
! #if HAVE_WCHAR_H && HAVE_WCTYPE_H && HAVE_MBSTOWCS
  
! /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
!    <wchar.h>.
!    BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
!    <wchar.h>.  */
! # include <stdio.h>
! # include <time.h>
! # include <wchar.h>
  
! # include <wctype.h>
! 
! #endif
! 
! #include "allocsa.h"
! 
! #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
  
! #if HAVE_WCHAR_H && HAVE_WCTYPE_H && HAVE_MBSTOWCS
  
! /* Tests whether a string can be compared like ASCII, in all locales.
!    Otherwise it contains a character that may need careful treatment in
!    a multibyte locale.  */
! static int
! ascii_case_comparable (const char *s)
! {
!   for (;;)
      {
!       unsigned char c = (unsigned char) *s;
  
!       if (c == '\0')
  	break;
+       if (c > 0x7f) /* non-ASCII ?  */
+ 	return 0;
+       if (c == 'i' || c == 'I')
+ 	/* 'i' and 'I' are special in Turkish locales.  */
+ 	return 0;
+       s++;
+     }
+   return 1;
+ }
+ 
+ #endif
  
! /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
!    greater than zero if S1 is lexicographically less than, equal to or greater
!    than S2.
!    Note: This function may, in multibyte locales, return 0 for strings of
!    different lengths!  */
! int
! strcasecmp (const char *s1, const char *s2)
! {
! #if HAVE_WCHAR_H && HAVE_WCTYPE_H && HAVE_MBSTOWCS
!   if (MB_CUR_MAX > 1
!       && !(ascii_case_comparable (s1) && ascii_case_comparable (s2)))
!     {
!       /* Convert the strings to wide-character strings.  */
!       size_t n1 = mbstowcs (NULL, s1, 0) + 1;
!       size_t n2 = mbstowcs (NULL, s2, 0) + 1;
! 
!       if (n1 > 0 && n2 > 0) /* no conversion errors?  */
! 	{
! 	  void *memory = allocsa ((n1 + n2) * sizeof (wchar_t));
! 	  wchar_t *wcs1 = (wchar_t *) memory;
! 	  wchar_t *wcs2 = wcs1 + n1;
! 
! 	  if (mbstowcs (wcs1, s1, n1) != (size_t)-1
! 	      && mbstowcs (wcs2, s2, n2) != (size_t)-1)
! 	    {
! 	      const wchar_t *p1 = wcs1;
! 	      const wchar_t *p2 = wcs2;
! 	      wchar_t c1, c2;
! 
! 	      /* Verify the wide-character strings are NUL-terminated.  */
! 	      if (!(wcs1[n1 - 1] == 0 && wcs2[n2 - 1] == 0))
! 		abort ();
! 
! 	      /* Compare the wide-character strings.  */
! 	      do
! 		{
! 		  c1 = towlower (*p1);
! 		  c2 = towlower (*p2);
! 
! 		  if (c1 == 0)
! 		    break;
! 
! 		  ++p1;
! 		  ++p2;
! 		}
! 	      while (c1 == c2);
! 
! 	      freesa (memory);
! 	      return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
! 	    }
! 	  freesa (memory);
! 	}
      }
! #endif
!   {
!     const unsigned char *p1 = (const unsigned char *) s1;
!     const unsigned char *p2 = (const unsigned char *) s2;
!     unsigned char c1, c2;
! 
!     if (p1 == p2)
!       return 0;
! 
!     do
!       {
! 	c1 = TOLOWER (*p1);
! 	c2 = TOLOWER (*p2);
! 
! 	if (c1 == '\0')
! 	  break;
! 
! 	++p1;
! 	++p2;
!       }
!     while (c1 == c2);
  
!     return c1 - c2;
!   }
  }
Index: m4/strcase.m4
===================================================================
RCS file: /cvsroot/gnulib/gnulib/m4/strcase.m4,v
retrieving revision 1.2
diff -c -3 -r1.2 strcase.m4
*** m4/strcase.m4	18 Jan 2005 13:07:56 -0000	1.2
--- m4/strcase.m4	15 Aug 2005 17:13:39 -0000
***************
*** 1,5 ****
! # strcase.m4 serial 1
! dnl Copyright (C) 2002 Free Software Foundation, Inc.
  dnl This file is free software; the Free Software Foundation
  dnl gives unlimited permission to copy and/or distribute it,
  dnl with or without modifications, as long as this notice is preserved.
--- 1,5 ----
! # strcase.m4 serial 2
! dnl Copyright (C) 2002, 2005 Free Software Foundation, Inc.
  dnl This file is free software; the Free Software Foundation
  dnl gives unlimited permission to copy and/or distribute it,
  dnl with or without modifications, as long as this notice is preserved.
***************
*** 12,21 ****
  
  AC_DEFUN([gl_FUNC_STRCASECMP],
  [
!   AC_REPLACE_FUNCS(strcasecmp)
!   if test $ac_cv_func_strcasecmp = no; then
!     gl_PREREQ_STRCASECMP
!   fi
  ])
  
  AC_DEFUN([gl_FUNC_STRNCASECMP],
--- 12,22 ----
  
  AC_DEFUN([gl_FUNC_STRCASECMP],
  [
!   dnl No known system has a strcasecmp() function that works correctly in
!   dnl multibyte locales. Therefore we use our version always.
!   AC_LIBOBJ(strcasecmp)
!   AC_DEFINE(strcasecmp, rpl_strcasecmp, [Define to rpl_strcasecmp always.])
!   gl_PREREQ_STRCASECMP
  ])
  
  AC_DEFUN([gl_FUNC_STRNCASECMP],
***************
*** 28,34 ****
  
  # Prerequisites of lib/strcasecmp.c.
  AC_DEFUN([gl_PREREQ_STRCASECMP], [
!   :
  ])
  
  # Prerequisites of lib/strncasecmp.c.
--- 29,36 ----
  
  # Prerequisites of lib/strcasecmp.c.
  AC_DEFUN([gl_PREREQ_STRCASECMP], [
!   AC_CHECK_HEADERS_ONCE([wchar.h wctype.h])
!   AC_CHECK_FUNCS_ONCE([mbstowcs])
  ])
  
  # Prerequisites of lib/strncasecmp.c.
_______________________________________________
bug-gnulib mailing list
bug-gnulib@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-gnulib

Reply via email to