On Android, I see this test failure: FAIL: test-iswctype ===================
../../gnulib-tests/test-iswctype.c:56: assertion 'iswctype (L'$', desc)' failed Aborted FAIL test-iswctype (exit status: 134) While the set of characters of the class "punct" in the C locale is not specified by POSIX [1][2][3], it is a reasonable expectation that ispunct() and iswpunct() are consistent. Which is not the case on Android: This program =============================================================================== #include <stdio.h> #include <ctype.h> #include <wctype.h> int main () { printf ("%c : %d %d %d\n", 33, !!ispunct (33), !!iswpunct (33), !!iswctype (33, wctype ("punct"))); printf ("%c : %d %d %d\n", 34, !!ispunct (34), !!iswpunct (34), !!iswctype (34, wctype ("punct"))); printf ("%c : %d %d %d\n", 35, !!ispunct (35), !!iswpunct (35), !!iswctype (35, wctype ("punct"))); printf ("%c : %d %d %d\n", 36, !!ispunct (36), !!iswpunct (36), !!iswctype (36, wctype ("punct"))); printf ("%c : %d %d %d\n", 37, !!ispunct (37), !!iswpunct (37), !!iswctype (37, wctype ("punct"))); printf ("%c : %d %d %d\n", 38, !!ispunct (38), !!iswpunct (38), !!iswctype (38, wctype ("punct"))); printf ("%c : %d %d %d\n", 39, !!ispunct (39), !!iswpunct (39), !!iswctype (39, wctype ("punct"))); printf ("%c : %d %d %d\n", 40, !!ispunct (40), !!iswpunct (40), !!iswctype (40, wctype ("punct"))); printf ("%c : %d %d %d\n", 41, !!ispunct (41), !!iswpunct (41), !!iswctype (41, wctype ("punct"))); printf ("%c : %d %d %d\n", 42, !!ispunct (42), !!iswpunct (42), !!iswctype (42, wctype ("punct"))); printf ("%c : %d %d %d\n", 43, !!ispunct (43), !!iswpunct (43), !!iswctype (43, wctype ("punct"))); printf ("%c : %d %d %d\n", 44, !!ispunct (44), !!iswpunct (44), !!iswctype (44, wctype ("punct"))); printf ("%c : %d %d %d\n", 45, !!ispunct (45), !!iswpunct (45), !!iswctype (45, wctype ("punct"))); printf ("%c : %d %d %d\n", 46, !!ispunct (46), !!iswpunct (46), !!iswctype (46, wctype ("punct"))); printf ("%c : %d %d %d\n", 47, !!ispunct (47), !!iswpunct (47), !!iswctype (47, wctype ("punct"))); printf ("%c : %d %d %d\n", 58, !!ispunct (58), !!iswpunct (58), !!iswctype (58, wctype ("punct"))); printf ("%c : %d %d %d\n", 59, !!ispunct (59), !!iswpunct (59), !!iswctype (59, wctype ("punct"))); printf ("%c : %d %d %d\n", 60, !!ispunct (60), !!iswpunct (60), !!iswctype (60, wctype ("punct"))); printf ("%c : %d %d %d\n", 61, !!ispunct (61), !!iswpunct (61), !!iswctype (61, wctype ("punct"))); printf ("%c : %d %d %d\n", 62, !!ispunct (62), !!iswpunct (62), !!iswctype (62, wctype ("punct"))); printf ("%c : %d %d %d\n", 63, !!ispunct (63), !!iswpunct (63), !!iswctype (63, wctype ("punct"))); printf ("%c : %d %d %d\n", 64, !!ispunct (64), !!iswpunct (64), !!iswctype (64, wctype ("punct"))); printf ("%c : %d %d %d\n", 91, !!ispunct (91), !!iswpunct (91), !!iswctype (91, wctype ("punct"))); printf ("%c : %d %d %d\n", 92, !!ispunct (92), !!iswpunct (92), !!iswctype (92, wctype ("punct"))); printf ("%c : %d %d %d\n", 93, !!ispunct (93), !!iswpunct (93), !!iswctype (93, wctype ("punct"))); printf ("%c : %d %d %d\n", 94, !!ispunct (94), !!iswpunct (94), !!iswctype (94, wctype ("punct"))); printf ("%c : %d %d %d\n", 95, !!ispunct (95), !!iswpunct (95), !!iswctype (95, wctype ("punct"))); printf ("%c : %d %d %d\n", 96, !!ispunct (96), !!iswpunct (96), !!iswctype (96, wctype ("punct"))); printf ("%c : %d %d %d\n", 123, !!ispunct (123), !!iswpunct (123), !!iswctype (123, wctype ("punct"))); printf ("%c : %d %d %d\n", 124, !!ispunct (124), !!iswpunct (124), !!iswctype (124, wctype ("punct"))); printf ("%c : %d %d %d\n", 125, !!ispunct (125), !!iswpunct (125), !!iswctype (125, wctype ("punct"))); printf ("%c : %d %d %d\n", 126, !!ispunct (126), !!iswpunct (126), !!iswctype (126, wctype ("punct"))); return 0; } =============================================================================== prints: ! : 1 1 1 " : 1 1 1 # : 1 1 1 $ : 1 0 0 % : 1 1 1 & : 1 1 1 ' : 1 1 1 ( : 1 1 1 ) : 1 1 1 * : 1 1 1 + : 1 0 0 , : 1 1 1 - : 1 1 1 . : 1 1 1 / : 1 1 1 : : 1 1 1 ; : 1 1 1 < : 1 0 0 = : 1 0 0 > : 1 0 0 ? : 1 1 1 @ : 1 1 1 [ : 1 1 1 \ : 1 1 1 ] : 1 1 1 ^ : 1 0 0 _ : 1 1 1 ` : 1 0 0 { : 1 1 1 | : 1 0 0 } : 1 1 1 ~ : 1 0 0 That is, the characters '$', '+', '<', '=', '>', '^', '`', '|', '~' are not considered to be in class "punct" by the wide-character APIs iswpunct(), iswctype(). Here's a set of patches that provides a workaround. So that, in particular, the [[:punct:]] syntax in fnmatch and regex will work consistently. [1] https://pubs.opengroup.org/onlinepubs/9699919799/functions/ispunct.html [2] https://pubs.opengroup.org/onlinepubs/9699919799/functions/iswpunct.html [3] https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html 2023-08-29 Bruno Haible <br...@clisp.org> wctype: Rely on module iswpunct. * m4/wctype.m4 (gl_FUNC_WCTYPE): Also test whether the "punct" class works. * modules/wctype (Depends-on): Add iswpunct. * tests/test-iswctype.c (main): Add more tests of the "punct" class. * doc/posix-functions/wctype.texi: Mention the Android problem. c32ispunct: Rely on module iswpunct. * modules/c32ispunct (Depends-on): Add iswpunct. * tests/test-c32ispunct.c (main): Add a few more tests in the "C" locale. iswpunct: Add tests. * tests/test-iswpunct.c: New file, based on tests/test-iswdigit.c and tests/test-c32ispunct.c. * tests/test-iswpunct.sh: New file, based on tests/test-iswdigit.sh. * modules/iswpunct-tests: New file. iswpunct: New module. * lib/wctype.in.h (iswpunct): New declaration. * lib/iswpunct.c: New file. * m4/iswpunct.m4: New file. * m4/wctype_h.m4 (gl_WCTYPE_H_REQUIRE_DEFAULTS): Initialize GNULIB_ISWPUNCT. (gl_WCTYPE_H_DEFAULTS): Initialize REPLACE_ISWPUNCT. * modules/wctype-h (Makefile.am): Substitute GNULIB_ISWPUNCT, REPLACE_ISWPUNCT. * modules/iswpunct: New file. * doc/posix-functions/iswpunct.texi: Mention the new module. wctype-h tests: Add more tests. * tests/test-wctype-h.c (main): Add a sanity check of iswpunct.
>From 2961a5154f4350814a7365ecac61c8d501dbeeb1 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Wed, 30 Aug 2023 02:18:00 +0200 Subject: [PATCH 1/5] wctype-h tests: Add more tests. * tests/test-wctype-h.c (main): Add a sanity check of iswpunct. --- ChangeLog | 5 +++++ tests/test-wctype-h.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/ChangeLog b/ChangeLog index c964be3f79..3824fd0ed4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2023-08-29 Bruno Haible <br...@clisp.org> + + wctype-h tests: Add more tests. + * tests/test-wctype-h.c (main): Add a sanity check of iswpunct. + 2023-08-29 Bruno Haible <br...@clisp.org> iswdigit, iswxdigit: Fix documentation. diff --git a/tests/test-wctype-h.c b/tests/test-wctype-h.c index af02fe7eb6..323273e99c 100644 --- a/tests/test-wctype-h.c +++ b/tests/test-wctype-h.c @@ -67,6 +67,38 @@ main (void) ASSERT (!iswprint (L'\t')); ASSERT (!iswprint (L'\n')); + /* Sanity check for the iswpunct function. + These characters are usually expected to be punctuation or symbol + characters. */ + ASSERT (iswpunct (L'!')); + ASSERT (iswpunct (L'"')); + ASSERT (iswpunct (L'#')); + ASSERT (iswpunct (L'%')); + ASSERT (iswpunct (L'&')); + ASSERT (iswpunct (L'\'')); + ASSERT (iswpunct (L'(')); + ASSERT (iswpunct (L')')); + ASSERT (iswpunct (L'*')); + ASSERT (iswpunct (L',')); + ASSERT (iswpunct (L'-')); + ASSERT (iswpunct (L'.')); + ASSERT (iswpunct (L'/')); + ASSERT (iswpunct (L':')); + ASSERT (iswpunct (L';')); + ASSERT (iswpunct (L'?')); + ASSERT (iswpunct (L'@')); + ASSERT (iswpunct (L'[')); + ASSERT (iswpunct (L'\\')); + ASSERT (iswpunct (L']')); + ASSERT (iswpunct (L'_')); + ASSERT (iswpunct (L'{')); + ASSERT (iswpunct (L'}')); + ASSERT (!iswpunct (L'5')); + ASSERT (!iswpunct (L'F')); + ASSERT (!iswpunct (L' ')); + ASSERT (!iswpunct (L'\t')); + ASSERT (!iswpunct (L'\n')); + /* Check that the tow* functions exist as functions or as macros. */ (void) towlower (0); (void) towupper (0); -- 2.34.1
>From 9753b53d66393b9b0059ba04e0a6a9c0b326b1fb Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Wed, 30 Aug 2023 02:18:06 +0200 Subject: [PATCH 2/5] iswpunct: New module. * lib/wctype.in.h (iswpunct): New declaration. * lib/iswpunct.c: New file. * m4/iswpunct.m4: New file. * m4/wctype_h.m4 (gl_WCTYPE_H_REQUIRE_DEFAULTS): Initialize GNULIB_ISWPUNCT. (gl_WCTYPE_H_DEFAULTS): Initialize REPLACE_ISWPUNCT. * modules/wctype-h (Makefile.am): Substitute GNULIB_ISWPUNCT, REPLACE_ISWPUNCT. * modules/iswpunct: New file. * doc/posix-functions/iswpunct.texi: Mention the new module. --- ChangeLog | 12 ++++++++ doc/posix-functions/iswpunct.texi | 8 +++++- lib/iswpunct.c | 33 ++++++++++++++++++++++ lib/wctype.in.h | 12 +++++++- m4/iswpunct.m4 | 47 +++++++++++++++++++++++++++++++ m4/wctype_h.m4 | 4 ++- modules/iswpunct | 30 ++++++++++++++++++++ modules/wctype-h | 2 ++ 8 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 lib/iswpunct.c create mode 100644 m4/iswpunct.m4 create mode 100644 modules/iswpunct diff --git a/ChangeLog b/ChangeLog index 3824fd0ed4..abb5917a76 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2023-08-29 Bruno Haible <br...@clisp.org> + iswpunct: New module. + * lib/wctype.in.h (iswpunct): New declaration. + * lib/iswpunct.c: New file. + * m4/iswpunct.m4: New file. + * m4/wctype_h.m4 (gl_WCTYPE_H_REQUIRE_DEFAULTS): Initialize + GNULIB_ISWPUNCT. + (gl_WCTYPE_H_DEFAULTS): Initialize REPLACE_ISWPUNCT. + * modules/wctype-h (Makefile.am): Substitute GNULIB_ISWPUNCT, + REPLACE_ISWPUNCT. + * modules/iswpunct: New file. + * doc/posix-functions/iswpunct.texi: Mention the new module. + wctype-h tests: Add more tests. * tests/test-wctype-h.c (main): Add a sanity check of iswpunct. diff --git a/doc/posix-functions/iswpunct.texi b/doc/posix-functions/iswpunct.texi index bc0e65ac93..663f2cca61 100644 --- a/doc/posix-functions/iswpunct.texi +++ b/doc/posix-functions/iswpunct.texi @@ -4,7 +4,7 @@ POSIX specification:@* @url{https://pubs.opengroup.org/onlinepubs/9699919799/functions/iswpunct.html} -Gnulib module: wctype-h +Gnulib module: iswpunct Portability problems fixed by Gnulib: @itemize @@ -15,6 +15,12 @@ This function cannot be called from plain inline or extern inline functions on some platforms: OS X 10.8. +@item +This function is inconsistent with the @code{ispunct} function, because it +returns false for the characters @code{'$'}, @code{'+'}, @code{'<'}, +@code{'='}, @code{'>'}, @code{'^'}, @code{'`'} , @code{'|'}, @code{'~'} +on some platforms: +Android 11. @end itemize Portability problems not fixed by Gnulib: diff --git a/lib/iswpunct.c b/lib/iswpunct.c new file mode 100644 index 0000000000..0d60cb71eb --- /dev/null +++ b/lib/iswpunct.c @@ -0,0 +1,33 @@ +/* Test wide character for being a punctuation or symbol character. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include <wctype.h> + +#include <ctype.h> + +int +iswpunct (wint_t wc) +#undef iswpunct +{ +#if defined __ANDROID__ + if ((unsigned int) wc < 128) + return ispunct ((unsigned int) wc); +#endif + return iswpunct (wc); +} diff --git a/lib/wctype.in.h b/lib/wctype.in.h index fd52715a8a..6f1e7ef149 100644 --- a/lib/wctype.in.h +++ b/lib/wctype.in.h @@ -133,7 +133,7 @@ typedef unsigned int rpl_wint_t; Linux libc5 has <wctype.h> and the functions but they are broken. mingw and MSVC have <wctype.h> and the functions but they take a wchar_t as argument, not an rpl_wint_t. Additionally, the mingw iswprint function - is broken. + and the Android iswpunct function are broken. Assume all 11 functions (all isw* except iswblank) are implemented the same way, or not at all. */ # if ! @HAVE_ISWCNTRL@ || @REPLACE_ISWCNTRL@ @@ -494,6 +494,16 @@ _GL_FUNCDECL_RPL (iswdigit, int, (wint_t wc)); # endif # endif +# if @GNULIB_ISWPUNCT@ +# if @REPLACE_ISWPUNCT@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# undef iswpunct +# define iswpunct rpl_iswpunct +# endif +_GL_FUNCDECL_RPL (iswpunct, int, (wint_t wc)); +# endif +# endif + # if @GNULIB_ISWXDIGIT@ # if @REPLACE_ISWXDIGIT@ # if !(defined __cplusplus && defined GNULIB_NAMESPACE) diff --git a/m4/iswpunct.m4 b/m4/iswpunct.m4 new file mode 100644 index 0000000000..14691154cf --- /dev/null +++ b/m4/iswpunct.m4 @@ -0,0 +1,47 @@ +# iswpunct.m4 serial 1 +dnl Copyright (C) 2023 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_ISWPUNCT], +[ + AC_REQUIRE([gl_WCTYPE_H_DEFAULTS]) + AC_REQUIRE([gl_WCTYPE_H]) + + if test $HAVE_ISWCNTRL = 0 || test $REPLACE_ISWCNTRL = 1; then + dnl <wctype.h> redefines iswpunct already. + REPLACE_ISWPUNCT="$REPLACE_ISWCNTRL" + else + AC_CACHE_CHECK([whether iswpunct is consistent with ispunct], + [gl_cv_func_iswpunct_works], + [AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include <ctype.h> +#include <wchar.h> +#include <wctype.h> +int +main (int argc, char *argv[]) +{ + int result = 0; + /* This fails on Android 11. */ + if ((! iswpunct ('\`')) != (! ispunct ('\`'))) + result |= 1; + return result; +}]])], + [gl_cv_func_iswpunct_works=yes], + [gl_cv_func_iswpunct_works=no], + [case "$host_os" in + # Guess no on Android. + android*) gl_cv_func_iswpunct_works="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_iswpunct_works="guessing yes" ;; + esac + ]) + ]) + case "$gl_cv_func_iswpunct_works" in + *yes) ;; + *) REPLACE_ISWPUNCT=1 ;; + esac + fi +]) diff --git a/m4/wctype_h.m4 b/m4/wctype_h.m4 index 43a2d9cf10..ac9c35b2da 100644 --- a/m4/wctype_h.m4 +++ b/m4/wctype_h.m4 @@ -1,4 +1,4 @@ -# wctype_h.m4 serial 32 +# wctype_h.m4 serial 33 dnl A placeholder for ISO C99 <wctype.h>, for platforms that lack it. @@ -178,6 +178,7 @@ AC_DEFUN([gl_WCTYPE_H_REQUIRE_DEFAULTS] m4_defun(GL_MODULE_INDICATOR_PREFIX[_WCTYPE_H_MODULE_INDICATOR_DEFAULTS], [ gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_ISWBLANK]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_ISWDIGIT]) + gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_ISWPUNCT]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_ISWXDIGIT]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_WCTYPE]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_ISWCTYPE]) @@ -196,6 +197,7 @@ AC_DEFUN([gl_WCTYPE_H_DEFAULTS] HAVE_WCTRANS_T=1; AC_SUBST([HAVE_WCTRANS_T]) REPLACE_ISWBLANK=0; AC_SUBST([REPLACE_ISWBLANK]) REPLACE_ISWDIGIT=0; AC_SUBST([REPLACE_ISWDIGIT]) + REPLACE_ISWPUNCT=0; AC_SUBST([REPLACE_ISWPUNCT]) REPLACE_ISWXDIGIT=0; AC_SUBST([REPLACE_ISWXDIGIT]) REPLACE_WCTRANS=0; AC_SUBST([REPLACE_WCTRANS]) REPLACE_WCTYPE=0; AC_SUBST([REPLACE_WCTYPE]) diff --git a/modules/iswpunct b/modules/iswpunct new file mode 100644 index 0000000000..0abeb94e4e --- /dev/null +++ b/modules/iswpunct @@ -0,0 +1,30 @@ +Description: +iswpunct() function: test wide character for being a punctuation or symbol +character. + +Files: +lib/iswpunct.c +m4/iswpunct.m4 + +Depends-on: +wctype-h + +configure.ac: +gl_FUNC_ISWPUNCT +gl_CONDITIONAL([GL_COND_OBJ_ISWPUNCT], + [! { test $HAVE_ISWCNTRL = 0 || test $REPLACE_ISWCNTRL = 1; } && test $REPLACE_ISWPUNCT = 1]) +gl_WCTYPE_MODULE_INDICATOR([iswpunct]) + +Makefile.am: +if GL_COND_OBJ_ISWPUNCT +lib_SOURCES += iswpunct.c +endif + +Include: +<wctype.h> + +License: +LGPLv2+ + +Maintainer: +Bruno Haible diff --git a/modules/wctype-h b/modules/wctype-h index 99aba62d90..682ef51be0 100644 --- a/modules/wctype-h +++ b/modules/wctype-h @@ -39,6 +39,7 @@ wctype.h: wctype.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H -e 's/@''GNULIBHEADERS_OVERRIDE_WINT_T''@/$(GNULIBHEADERS_OVERRIDE_WINT_T)/g' \ -e 's/@''GNULIB_ISWBLANK''@/$(GNULIB_ISWBLANK)/g' \ -e 's/@''GNULIB_ISWDIGIT''@/$(GNULIB_ISWDIGIT)/g' \ + -e 's/@''GNULIB_ISWPUNCT''@/$(GNULIB_ISWPUNCT)/g' \ -e 's/@''GNULIB_ISWXDIGIT''@/$(GNULIB_ISWXDIGIT)/g' \ -e 's/@''GNULIB_WCTYPE''@/$(GNULIB_WCTYPE)/g' \ -e 's/@''GNULIB_ISWCTYPE''@/$(GNULIB_ISWCTYPE)/g' \ @@ -51,6 +52,7 @@ wctype.h: wctype.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H -e 's/@''HAVE_WINT_T''@/$(HAVE_WINT_T)/g' \ -e 's/@''REPLACE_ISWBLANK''@/$(REPLACE_ISWBLANK)/g' \ -e 's/@''REPLACE_ISWDIGIT''@/$(REPLACE_ISWDIGIT)/g' \ + -e 's/@''REPLACE_ISWPUNCT''@/$(REPLACE_ISWPUNCT)/g' \ -e 's/@''REPLACE_ISWXDIGIT''@/$(REPLACE_ISWXDIGIT)/g' \ -e 's/@''REPLACE_ISWCNTRL''@/$(REPLACE_ISWCNTRL)/g' \ -e 's/@''REPLACE_TOWLOWER''@/$(REPLACE_TOWLOWER)/g' \ -- 2.34.1
>From 15da9e79fb3fe7f9ca577929fbf20b34ef4faf91 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Wed, 30 Aug 2023 02:18:26 +0200 Subject: [PATCH 3/5] iswpunct: Add tests. * tests/test-iswpunct.c: New file, based on tests/test-iswdigit.c and tests/test-c32ispunct.c. * tests/test-iswpunct.sh: New file, based on tests/test-iswdigit.sh. * modules/iswpunct-tests: New file. --- ChangeLog | 6 ++ modules/iswpunct-tests | 16 +++++ tests/test-iswpunct.c | 153 +++++++++++++++++++++++++++++++++++++++++ tests/test-iswpunct.sh | 7 ++ 4 files changed, 182 insertions(+) create mode 100644 modules/iswpunct-tests create mode 100644 tests/test-iswpunct.c create mode 100755 tests/test-iswpunct.sh diff --git a/ChangeLog b/ChangeLog index abb5917a76..a21cdf3ccf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2023-08-29 Bruno Haible <br...@clisp.org> + iswpunct: Add tests. + * tests/test-iswpunct.c: New file, based on tests/test-iswdigit.c and + tests/test-c32ispunct.c. + * tests/test-iswpunct.sh: New file, based on tests/test-iswdigit.sh. + * modules/iswpunct-tests: New file. + iswpunct: New module. * lib/wctype.in.h (iswpunct): New declaration. * lib/iswpunct.c: New file. diff --git a/modules/iswpunct-tests b/modules/iswpunct-tests new file mode 100644 index 0000000000..3c6230fe4c --- /dev/null +++ b/modules/iswpunct-tests @@ -0,0 +1,16 @@ +Files: +tests/test-iswpunct.sh +tests/test-iswpunct.c +tests/signature.h +tests/macros.h + +Depends-on: +mbrtowc +setlocale + +configure.ac: + +Makefile.am: +TESTS += test-iswpunct.sh +check_PROGRAMS += test-iswpunct +test_iswpunct_LDADD = $(LDADD) $(SETLOCALE_LIB) $(MBRTOWC_LIB) diff --git a/tests/test-iswpunct.c b/tests/test-iswpunct.c new file mode 100644 index 0000000000..5404df961a --- /dev/null +++ b/tests/test-iswpunct.c @@ -0,0 +1,153 @@ +/* Test of iswpunct() function. + Copyright (C) 2020-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +#include <wctype.h> + +#include "signature.h" +SIGNATURE_CHECK (iswpunct, int, (wint_t)); + +#include <locale.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +#include "macros.h" + +/* Returns the value of iswpunct for the multibyte character s[0..n-1]. */ +static int +for_character (const char *s, size_t n) +{ + mbstate_t state; + wchar_t wc; + size_t ret; + + memset (&state, '\0', sizeof (mbstate_t)); + wc = (wchar_t) 0xBADFACE; + ret = mbrtowc (&wc, s, n, &state); + if (ret == n) + return iswpunct (wc); + else + return 0; +} + +int +main (int argc, char *argv[]) +{ + int is; + char buf[4]; + + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + /* Test WEOF. */ + is = iswpunct (WEOF); + ASSERT (is == 0); + + /* Test single-byte characters. + POSIX specifies in + <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html> + no explicit list of punctuation or symbol characters. */ + { + int c; + + for (c = 0; c < 0x100; c++) + switch (c) + { + case '\t': case '\v': case '\f': + case ' ': case '!': case '"': case '#': case '%': + case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': + case '?': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case '[': case '\\': case ']': case '^': case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': case '{': case '|': case '}': case '~': + /* c is in the ISO C "basic character set". */ + buf[0] = (unsigned char) c; + is = for_character (buf, 1); + switch (c) + { + case ' ': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + /* c is an alphanumeric or space character. */ + ASSERT (is == 0); + break; + case '!': case '"': case '#': case '%': + case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case '-': case '.': case '/': + case ':': case ';': case '<': case '=': case '>': + case '?': + case '[': case '\\': case ']': case '^': case '_': + case '{': case '|': case '}': case '~': + /* These characters are usually expected to be punctuation or + symbol characters. */ + ASSERT (is != 0); + break; + default: + ASSERT (is == 0); + break; + } + break; + } + } + + if (argc > 1) + switch (argv[1][0]) + { + case '0': + /* C locale; tested above. */ + /* These characters are not in the ISO C "basic character set", but + are nevertheless usually expected to be punctuation or symbol + characters. */ + is = for_character ("$", 1); + ASSERT (is != 0); + is = for_character ("@", 1); + ASSERT (is != 0); + is = for_character ("`", 1); + ASSERT (is != 0); + return 0; + } + + return 1; +} diff --git a/tests/test-iswpunct.sh b/tests/test-iswpunct.sh new file mode 100755 index 0000000000..366e18592a --- /dev/null +++ b/tests/test-iswpunct.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Test in the POSIX locale. +LC_ALL=C ${CHECKER} ./test-iswpunct${EXEEXT} 0 || exit 1 +LC_ALL=POSIX ${CHECKER} ./test-iswpunct${EXEEXT} 0 || exit 1 + +exit 0 -- 2.34.1
>From 01b1c5938372e72d7d0af2b123b37d9add3d235c Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Wed, 30 Aug 2023 02:18:40 +0200 Subject: [PATCH 4/5] c32ispunct: Rely on module iswpunct. * modules/c32ispunct (Depends-on): Add iswpunct. * tests/test-c32ispunct.c (main): Add a few more tests in the "C" locale. --- ChangeLog | 5 +++++ modules/c32ispunct | 1 + tests/test-c32ispunct.c | 9 +++++++++ 3 files changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index a21cdf3ccf..ae888b304a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2023-08-29 Bruno Haible <br...@clisp.org> + c32ispunct: Rely on module iswpunct. + * modules/c32ispunct (Depends-on): Add iswpunct. + * tests/test-c32ispunct.c (main): Add a few more tests in the "C" + locale. + iswpunct: Add tests. * tests/test-iswpunct.c: New file, based on tests/test-iswdigit.c and tests/test-c32ispunct.c. diff --git a/modules/c32ispunct b/modules/c32ispunct index bb78c9ea6a..ff9ee40825 100644 --- a/modules/c32ispunct +++ b/modules/c32ispunct @@ -16,6 +16,7 @@ Depends-on: uchar wchar wctype-h +iswpunct localcharset [test $REPLACE_MBSTATE_T = 1] streq [test $REPLACE_MBSTATE_T = 1] unictype/ctype-punct diff --git a/tests/test-c32ispunct.c b/tests/test-c32ispunct.c index 5167e89cf9..f0df92b9f1 100644 --- a/tests/test-c32ispunct.c +++ b/tests/test-c32ispunct.c @@ -136,6 +136,15 @@ main (int argc, char *argv[]) { case '0': /* C locale; tested above. */ + /* These characters are not in the ISO C "basic character set", but + are nevertheless usually expected to be punctuation or symbol + characters. */ + is = for_character ("$", 1); + ASSERT (is != 0); + is = for_character ("@", 1); + ASSERT (is != 0); + is = for_character ("`", 1); + ASSERT (is != 0); return 0; case '1': -- 2.34.1
>From a07c0e4b0c5659d04b77a3f6b636c1ade7358fbb Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Wed, 30 Aug 2023 02:19:01 +0200 Subject: [PATCH 5/5] wctype: Rely on module iswpunct. * m4/wctype.m4 (gl_FUNC_WCTYPE): Also test whether the "punct" class works. * modules/wctype (Depends-on): Add iswpunct. * tests/test-iswctype.c (main): Add more tests of the "punct" class. * doc/posix-functions/wctype.texi: Mention the Android problem. --- ChangeLog | 7 +++++++ doc/posix-functions/wctype.texi | 4 ++++ m4/wctype.m4 | 10 ++++++++-- modules/wctype | 1 + tests/test-iswctype.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index ae888b304a..a839600839 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2023-08-29 Bruno Haible <br...@clisp.org> + wctype: Rely on module iswpunct. + * m4/wctype.m4 (gl_FUNC_WCTYPE): Also test whether the "punct" class + works. + * modules/wctype (Depends-on): Add iswpunct. + * tests/test-iswctype.c (main): Add more tests of the "punct" class. + * doc/posix-functions/wctype.texi: Mention the Android problem. + c32ispunct: Rely on module iswpunct. * modules/c32ispunct (Depends-on): Add iswpunct. * tests/test-c32ispunct.c (main): Add a few more tests in the "C" diff --git a/doc/posix-functions/wctype.texi b/doc/posix-functions/wctype.texi index 957403ccee..6346703b60 100644 --- a/doc/posix-functions/wctype.texi +++ b/doc/posix-functions/wctype.texi @@ -24,6 +24,10 @@ is inconsistent with the @code{iswblank} and @code{isblank} functions on some platforms: MSVC 14. +@item +The object returned by this function for the @code{"punct"} character class +is inconsistent with the @code{ispunct} function on some platforms: +Android 11. @end itemize Portability problems not fixed by Gnulib: diff --git a/m4/wctype.m4 b/m4/wctype.m4 index ad78c8b166..4f3a836dc5 100644 --- a/m4/wctype.m4 +++ b/m4/wctype.m4 @@ -1,4 +1,4 @@ -# wctype.m4 serial 5 +# wctype.m4 serial 6 dnl Copyright (C) 2011-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -11,10 +11,11 @@ AC_DEFUN_ONCE([gl_FUNC_WCTYPE] AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles HAVE_WCTYPE=$HAVE_WCTYPE_T if test $HAVE_WCTYPE = 1; then - AC_CACHE_CHECK([whether wctype supports the "blank" character class], + AC_CACHE_CHECK([whether wctype supports the "blank" and "punct" character classes], [gl_cv_func_wctype_works], [AC_RUN_IFELSE( [AC_LANG_SOURCE([[ + #include <ctype.h> #include <wchar.h> #include <wctype.h> int main () @@ -25,6 +26,9 @@ AC_DEFUN_ONCE([gl_FUNC_WCTYPE] /* This test fails on MSVC 14. */ if ((! iswctype ('\t', wctype ("blank"))) != (! iswblank ('\t'))) return 2; + /* This test fails on Android 11. */ + if ((! iswctype ('\`', wctype ("punct"))) != (! ispunct ('\`'))) + return 4; return 0; } ]])], @@ -32,6 +36,8 @@ AC_DEFUN_ONCE([gl_FUNC_WCTYPE] [case "$host_os" in # Guess no on native Windows. mingw* | windows*) gl_cv_func_wctype_works="guessing no" ;; + # Guess no on Android. + android*) gl_cv_func_wctype_works="guessing no" ;; # Guess yes otherwise. *) gl_cv_func_wctype_works="guessing yes" ;; esac diff --git a/modules/wctype b/modules/wctype index f3e449938b..69f2deb608 100644 --- a/modules/wctype +++ b/modules/wctype @@ -10,6 +10,7 @@ Depends-on: wctype-h iswblank [test $HAVE_WCTYPE = 0 || test $REPLACE_WCTYPE = 1] iswdigit [test $HAVE_WCTYPE = 0 || test $REPLACE_WCTYPE = 1] +iswpunct [test $HAVE_WCTYPE = 0 || test $REPLACE_WCTYPE = 1] iswxdigit [test $HAVE_WCTYPE = 0 || test $REPLACE_WCTYPE = 1] # When we override wctype_t, we also need to override iswctype(). iswctype [test $REPLACE_WCTYPE = 1] diff --git a/tests/test-iswctype.c b/tests/test-iswctype.c index a500f53fa2..fabd1b6a31 100644 --- a/tests/test-iswctype.c +++ b/tests/test-iswctype.c @@ -53,10 +53,38 @@ main (int argc, char *argv[]) desc = wctype ("punct"); ASSERT (desc != (wctype_t) 0); + ASSERT (iswctype (L'!', desc)); + ASSERT (iswctype (L'"', desc)); + ASSERT (iswctype (L'#', desc)); ASSERT (iswctype (L'$', desc)); + ASSERT (iswctype (L'%', desc)); + ASSERT (iswctype (L'&', desc)); + ASSERT (iswctype (L'\'', desc)); + ASSERT (iswctype (L'(', desc)); + ASSERT (iswctype (L')', desc)); + ASSERT (iswctype (L'*', desc)); + ASSERT (iswctype (L'+', desc)); + ASSERT (iswctype (L',', desc)); + ASSERT (iswctype (L'-', desc)); ASSERT (iswctype (L'.', desc)); + ASSERT (iswctype (L'/', desc)); + ASSERT (iswctype (L':', desc)); + ASSERT (iswctype (L';', desc)); ASSERT (iswctype (L'<', desc)); + ASSERT (iswctype (L'=', desc)); ASSERT (iswctype (L'>', desc)); + ASSERT (iswctype (L'?', desc)); + ASSERT (iswctype (L'@', desc)); + ASSERT (iswctype (L'[', desc)); + ASSERT (iswctype (L'\\', desc)); + ASSERT (iswctype (L']', desc)); + ASSERT (iswctype (L'^', desc)); + ASSERT (iswctype (L'_', desc)); + ASSERT (iswctype (L'`', desc)); + ASSERT (iswctype (L'{', desc)); + ASSERT (iswctype (L'|', desc)); + ASSERT (iswctype (L'}', desc)); + ASSERT (iswctype (L'~', desc)); ASSERT (! iswctype (L' ', desc)); ASSERT (! iswctype (L'a', desc)); ASSERT (! iswctype (L'1', desc)); -- 2.34.1