When we offer startswith() and endswith() functions for plain unibyte C strings, we need to do the same with multibyte strings as well. Done as follows:
2025-01-03 Bruno Haible <br...@clisp.org> doc: Mention the new modules. * doc/strings.texi (Comparison of string APIs): Add rows for startswith and endswith functions. 2025-01-03 Bruno Haible <br...@clisp.org> mbs_endswith: Add tests. * tests/test-mbs_endswith1.c: New file. * tests/test-mbs_endswith2.sh: New file, based on tests/test-mbsstr2.sh. * tests/test-mbs_endswith2.c: New file. * tests/test-mbs_endswith3.sh: New file, based on tests/test-mbsstr3.sh. * tests/test-mbs_endswith3.c: New file. * modules/mbs_endswith-tests: New file. mbs_endswith: New module. * lib/string.in.h (mbs_endswith): New declaration. * lib/mbs_endswith.c: New file. * m4/string_h.m4 (gl_STRING_H_REQUIRE_DEFAULTS): Initialize GNULIB_MBS_ENDSWITH. * modules/string-h (Makefile.am): Substitute GNULIB_MBS_ENDSWITH. * modules/mbs_endswith: New file. 2025-01-03 Bruno Haible <br...@clisp.org> mbs_startswith: New module. * lib/string.in.h (mbs_startswith): New declaration. * m4/string_h.m4 (gl_STRING_H_REQUIRE_DEFAULTS): Initialize GNULIB_MBS_STARTSWITH. * modules/string-h (Makefile.am): Substitute GNULIB_MBS_STARTSWITH. * modules/mbs_startswith: New file.
>From 398ebac4849fa0f97112f62190236fdf6ac64676 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Fri, 3 Jan 2025 11:17:16 +0100 Subject: [PATCH 1/4] mbs_startswith: New module. * lib/string.in.h (mbs_startswith): New declaration. * m4/string_h.m4 (gl_STRING_H_REQUIRE_DEFAULTS): Initialize GNULIB_MBS_STARTSWITH. * modules/string-h (Makefile.am): Substitute GNULIB_MBS_STARTSWITH. * modules/mbs_startswith: New file. --- ChangeLog | 9 +++++++++ lib/string.in.h | 10 ++++++++++ m4/string_h.m4 | 3 ++- modules/mbs_startswith | 22 ++++++++++++++++++++++ modules/string-h | 1 + 5 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 modules/mbs_startswith diff --git a/ChangeLog b/ChangeLog index 300538b682..a7b87e7061 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2025-01-03 Bruno Haible <br...@clisp.org> + + mbs_startswith: New module. + * lib/string.in.h (mbs_startswith): New declaration. + * m4/string_h.m4 (gl_STRING_H_REQUIRE_DEFAULTS): Initialize + GNULIB_MBS_STARTSWITH. + * modules/string-h (Makefile.am): Substitute GNULIB_MBS_STARTSWITH. + * modules/mbs_startswith: New file. + 2025-01-03 Bruno Haible <br...@clisp.org> tests: Use str_endswith. diff --git a/lib/string.in.h b/lib/string.in.h index f11b8f5706..ce5b493d71 100644 --- a/lib/string.in.h +++ b/lib/string.in.h @@ -1317,6 +1317,16 @@ _GL_EXTERN_C char * mbstok_r (char *restrict string, const char *delim, _GL_ARG_NONNULL ((2, 3)); #endif +#if @GNULIB_MBS_STARTSWITH@ +/* Returns true if STRING starts with PREFIX. + Returns false otherwise. */ +_GL_EXTERN_C int mbs_startswith (const char *string, const char *prefix) + _GL_ATTRIBUTE_PURE + _GL_ARG_NONNULL ((1, 2)); +/* No extra code is needed for multibyte locales for this function. */ +# define mbs_startswith str_startswith +#endif + /* Map any int, typically from errno, into an error message. */ #if @GNULIB_STRERROR@ # if @REPLACE_STRERROR@ diff --git a/m4/string_h.m4 b/m4/string_h.m4 index d2adb95bad..a2c8536f38 100644 --- a/m4/string_h.m4 +++ b/m4/string_h.m4 @@ -1,5 +1,5 @@ # string_h.m4 -# serial 41 +# serial 42 dnl Copyright (C) 2007-2025 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -86,6 +86,7 @@ AC_DEFUN([gl_STRING_H_REQUIRE_DEFAULTS] gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBSSPN]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBSSEP]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBSTOK_R]) + gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBS_STARTSWITH]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STRERROR]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STRERROR_R]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STRERRORNAME_NP]) diff --git a/modules/mbs_startswith b/modules/mbs_startswith new file mode 100644 index 0000000000..b7145de034 --- /dev/null +++ b/modules/mbs_startswith @@ -0,0 +1,22 @@ +Description: +mbs_startswith() function: test whether a multibyte string starts with a given prefix. + +Files: + +Depends-on: +string-h +str_startswith + +configure.ac: +gl_STRING_MODULE_INDICATOR([mbs_startswith]) + +Makefile.am: + +Include: +<string.h> + +License: +LGPLv2+ + +Maintainer: +all diff --git a/modules/string-h b/modules/string-h index 0897afe206..1114bcf9eb 100644 --- a/modules/string-h +++ b/modules/string-h @@ -51,6 +51,7 @@ string.h: string.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H -e 's/@''GNULIB_MBSSPN''@/$(GNULIB_MBSSPN)/g' \ -e 's/@''GNULIB_MBSSEP''@/$(GNULIB_MBSSEP)/g' \ -e 's/@''GNULIB_MBSTOK_R''@/$(GNULIB_MBSTOK_R)/g' \ + -e 's/@''GNULIB_MBS_STARTSWITH''@/$(GNULIB_MBS_STARTSWITH)/g' \ -e 's/@''GNULIB_MEMCHR''@/$(GNULIB_MEMCHR)/g' \ -e 's/@''GNULIB_MEMMEM''@/$(GNULIB_MEMMEM)/g' \ -e 's/@''GNULIB_MEMPCPY''@/$(GNULIB_MEMPCPY)/g' \ -- 2.43.0
>From f3436baf8ec19ee8f81441890d20bd5a096fb885 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Fri, 3 Jan 2025 13:32:32 +0100 Subject: [PATCH 2/4] mbs_endswith: New module. * lib/string.in.h (mbs_endswith): New declaration. * lib/mbs_endswith.c: New file. * m4/string_h.m4 (gl_STRING_H_REQUIRE_DEFAULTS): Initialize GNULIB_MBS_ENDSWITH. * modules/string-h (Makefile.am): Substitute GNULIB_MBS_ENDSWITH. * modules/mbs_endswith: New file. --- ChangeLog | 10 ++++++ lib/mbs_endswith.c | 74 ++++++++++++++++++++++++++++++++++++++++++++ lib/string.in.h | 10 ++++++ m4/string_h.m4 | 3 +- modules/mbs_endswith | 26 ++++++++++++++++ modules/string-h | 1 + 6 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 lib/mbs_endswith.c create mode 100644 modules/mbs_endswith diff --git a/ChangeLog b/ChangeLog index a7b87e7061..4dc577daf5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2025-01-03 Bruno Haible <br...@clisp.org> + + mbs_endswith: New module. + * lib/string.in.h (mbs_endswith): New declaration. + * lib/mbs_endswith.c: New file. + * m4/string_h.m4 (gl_STRING_H_REQUIRE_DEFAULTS): Initialize + GNULIB_MBS_ENDSWITH. + * modules/string-h (Makefile.am): Substitute GNULIB_MBS_ENDSWITH. + * modules/mbs_endswith: New file. + 2025-01-03 Bruno Haible <br...@clisp.org> mbs_startswith: New module. diff --git a/lib/mbs_endswith.c b/lib/mbs_endswith.c new file mode 100644 index 0000000000..5007a445ec --- /dev/null +++ b/lib/mbs_endswith.c @@ -0,0 +1,74 @@ +/* mbs_endswith function. + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2025. */ + +#include "config.h" + +/* Specification. */ +#include <string.h> + +#include "mbiter.h" + +#include <stdlib.h> + +int +mbs_endswith (const char *string, const char *suffix) +{ + if ((unsigned char) suffix[0] < 0x30) + /* Some knowledge about the possible multibyte encodings: + + Encoding First byte of character Which of these can occur + as second or later byte? + + EUC-JP 0x00..0x7F, 0x8E..0xFE 0xA1..0xFE + EUC-KR 0x00..0x7F, 0xA1..0xFD 0xA1..0xFD + GB2312 0x00..0x7F, 0xA1..0xF7 0xA1..0xF7 + EUC-TW 0x00..0x7F, 0x8E..0xFD 0xA1..0xFD + BIG5 0x00..0x7F, 0xA1..0xF9 0x40..0x7E, 0xA1..0xF9 + GB18030 0x00..0x7F, 0x81..0xFE 0x30..0x39, 0x40..0x7E, 0x81..0xFE + SJIS 0x00..0x7F, 0x81..0xF9 0x40..0x7E, 0x81..0xF9 + UTF-8 0x00..0x7F, 0xC2..0xF7 none + + Therefore, if the first byte of SUFFIX is < 0x30, it cannot occur as + second or later byte, and therefore it is OK to do a bytewise search. */ + return str_endswith (string, suffix); + + /* Here, suffix is not empty. */ + + size_t nbytes = strlen (string); + if (nbytes >= strlen (suffix)) + { + size_t len = mbslen (string); + size_t n = mbslen (suffix); + if (len >= n) + { + mbi_iterator_t iter; + mbi_init (iter, string, nbytes); + /* Advance past (len - n) multibyte characters. */ + for (; len > n; len--) + { + if (!mbi_avail (iter)) + abort (); + mbi_advance (iter); + } + if (!mbi_avail (iter)) + abort (); + return strcmp (mbi_cur_ptr (iter), suffix) == 0; + } + } + return 0; +} diff --git a/lib/string.in.h b/lib/string.in.h index ce5b493d71..6b9e342a1a 100644 --- a/lib/string.in.h +++ b/lib/string.in.h @@ -1327,6 +1327,16 @@ _GL_EXTERN_C int mbs_startswith (const char *string, const char *prefix) # define mbs_startswith str_startswith #endif +#if @GNULIB_MBS_ENDSWITH@ +/* Returns true if STRING ends with SUFFIX. + Returns false otherwise. + Unlike str_endswith(), this function works correctly in multibyte locales. + */ +_GL_EXTERN_C int mbs_endswith (const char *string, const char *suffix) + _GL_ATTRIBUTE_PURE + _GL_ARG_NONNULL ((1, 2)); +#endif + /* Map any int, typically from errno, into an error message. */ #if @GNULIB_STRERROR@ # if @REPLACE_STRERROR@ diff --git a/m4/string_h.m4 b/m4/string_h.m4 index a2c8536f38..d0a6760811 100644 --- a/m4/string_h.m4 +++ b/m4/string_h.m4 @@ -1,5 +1,5 @@ # string_h.m4 -# serial 42 +# serial 43 dnl Copyright (C) 2007-2025 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -87,6 +87,7 @@ AC_DEFUN([gl_STRING_H_REQUIRE_DEFAULTS] gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBSSEP]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBSTOK_R]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBS_STARTSWITH]) + gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBS_ENDSWITH]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STRERROR]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STRERROR_R]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STRERRORNAME_NP]) diff --git a/modules/mbs_endswith b/modules/mbs_endswith new file mode 100644 index 0000000000..19ead99cd5 --- /dev/null +++ b/modules/mbs_endswith @@ -0,0 +1,26 @@ +Description: +mbs_endswith() function: test whether a multibyte string ends with a given suffix. + +Files: +lib/mbs_endswith.c + +Depends-on: +string-h +str_endswith +mbslen +mbiter + +configure.ac: +gl_STRING_MODULE_INDICATOR([mbs_endswith]) + +Makefile.am: +lib_SOURCES += mbs_endswith.c + +Include: +<string.h> + +License: +LGPL + +Maintainer: +all diff --git a/modules/string-h b/modules/string-h index 1114bcf9eb..4f9c205424 100644 --- a/modules/string-h +++ b/modules/string-h @@ -51,6 +51,7 @@ string.h: string.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H -e 's/@''GNULIB_MBSSPN''@/$(GNULIB_MBSSPN)/g' \ -e 's/@''GNULIB_MBSSEP''@/$(GNULIB_MBSSEP)/g' \ -e 's/@''GNULIB_MBSTOK_R''@/$(GNULIB_MBSTOK_R)/g' \ + -e 's/@''GNULIB_MBS_ENDSWITH''@/$(GNULIB_MBS_ENDSWITH)/g' \ -e 's/@''GNULIB_MBS_STARTSWITH''@/$(GNULIB_MBS_STARTSWITH)/g' \ -e 's/@''GNULIB_MEMCHR''@/$(GNULIB_MEMCHR)/g' \ -e 's/@''GNULIB_MEMMEM''@/$(GNULIB_MEMMEM)/g' \ -- 2.43.0
From 343b31bf1b7492ad2519143fd490565a6f6a2321 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Fri, 3 Jan 2025 13:34:16 +0100 Subject: [PATCH 3/4] mbs_endswith: Add tests. * tests/test-mbs_endswith1.c: New file. * tests/test-mbs_endswith2.sh: New file, based on tests/test-mbsstr2.sh. * tests/test-mbs_endswith2.c: New file. * tests/test-mbs_endswith3.sh: New file, based on tests/test-mbsstr3.sh. * tests/test-mbs_endswith3.c: New file. * modules/mbs_endswith-tests: New file. --- ChangeLog | 8 +++++ modules/mbs_endswith-tests | 30 ++++++++++++++++ tests/test-mbs_endswith1.c | 48 +++++++++++++++++++++++++ tests/test-mbs_endswith2.c | 66 ++++++++++++++++++++++++++++++++++ tests/test-mbs_endswith2.sh | 23 ++++++++++++ tests/test-mbs_endswith3.c | 71 +++++++++++++++++++++++++++++++++++++ tests/test-mbs_endswith3.sh | 15 ++++++++ 7 files changed, 261 insertions(+) create mode 100644 modules/mbs_endswith-tests create mode 100644 tests/test-mbs_endswith1.c create mode 100644 tests/test-mbs_endswith2.c create mode 100755 tests/test-mbs_endswith2.sh create mode 100644 tests/test-mbs_endswith3.c create mode 100755 tests/test-mbs_endswith3.sh diff --git a/ChangeLog b/ChangeLog index 4dc577daf5..3cce627d4b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2025-01-03 Bruno Haible <br...@clisp.org> + mbs_endswith: Add tests. + * tests/test-mbs_endswith1.c: New file. + * tests/test-mbs_endswith2.sh: New file, based on tests/test-mbsstr2.sh. + * tests/test-mbs_endswith2.c: New file. + * tests/test-mbs_endswith3.sh: New file, based on tests/test-mbsstr3.sh. + * tests/test-mbs_endswith3.c: New file. + * modules/mbs_endswith-tests: New file. + mbs_endswith: New module. * lib/string.in.h (mbs_endswith): New declaration. * lib/mbs_endswith.c: New file. diff --git a/modules/mbs_endswith-tests b/modules/mbs_endswith-tests new file mode 100644 index 0000000000..62032384d1 --- /dev/null +++ b/modules/mbs_endswith-tests @@ -0,0 +1,30 @@ +Files: +tests/test-mbs_endswith1.c +tests/test-mbs_endswith2.sh +tests/test-mbs_endswith2.c +tests/test-mbs_endswith3.sh +tests/test-mbs_endswith3.c +tests/macros.h +m4/locale-en.m4 +m4/locale-fr.m4 +m4/locale-zh.m4 +m4/codeset.m4 + +Depends-on: +setlocale + +configure.ac: +gt_LOCALE_EN_UTF8 +gt_LOCALE_FR_UTF8 +gt_LOCALE_ZH_CN + +Makefile.am: +TESTS += test-mbs_endswith1 test-mbs_endswith2.sh test-mbs_endswith3.sh +TESTS_ENVIRONMENT += \ + LOCALE_EN_UTF8='@LOCALE_EN_UTF8@' \ + LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ + LOCALE_ZH_CN='@LOCALE_ZH_CN@' +check_PROGRAMS += test-mbs_endswith1 test-mbs_endswith2 test-mbs_endswith3 +test_mbs_endswith1_LDADD = $(LDADD) $(LIBUNISTRING) $(MBRTOWC_LIB) $(LIBC32CONV) +test_mbs_endswith2_LDADD = $(LDADD) $(LIBUNISTRING) $(SETLOCALE_LIB) $(MBRTOWC_LIB) $(LIBC32CONV) +test_mbs_endswith3_LDADD = $(LDADD) $(LIBUNISTRING) $(SETLOCALE_LIB) $(MBRTOWC_LIB) $(LIBC32CONV) diff --git a/tests/test-mbs_endswith1.c b/tests/test-mbs_endswith1.c new file mode 100644 index 0000000000..9153ed03f1 --- /dev/null +++ b/tests/test-mbs_endswith1.c @@ -0,0 +1,48 @@ +/* Test of mbs_endswith() function. + Copyright (C) 2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2025. */ + +#include <config.h> + +#include <string.h> + +#include <stdlib.h> + +#include "macros.h" + +int +main () +{ + /* This test is executed in the C locale. */ + + ASSERT (mbs_endswith ("", "")); + ASSERT (mbs_endswith ("abc", "")); + + ASSERT (!mbs_endswith ("", "c")); + ASSERT (!mbs_endswith ("x", "c")); + ASSERT (mbs_endswith ("c", "c")); + ASSERT (mbs_endswith ("abc", "c")); + + ASSERT (!mbs_endswith ("", "xyz")); + ASSERT (!mbs_endswith ("x", "xyz")); + ASSERT (!mbs_endswith ("a", "xyz")); + ASSERT (!mbs_endswith ("abc", "xyz")); + ASSERT (mbs_endswith ("xyz", "xyz")); + ASSERT (mbs_endswith ("yxxyz", "xyz")); + + return test_exit_status; +} diff --git a/tests/test-mbs_endswith2.c b/tests/test-mbs_endswith2.c new file mode 100644 index 0000000000..f76f9b04e4 --- /dev/null +++ b/tests/test-mbs_endswith2.c @@ -0,0 +1,66 @@ +/* Test of mbs_endswith() function. + Copyright (C) 2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2025. */ + +#include <config.h> + +#include <string.h> + +#include <locale.h> +#include <stdlib.h> + +#include "macros.h" + +int +main () +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + ASSERT (mbs_endswith ("", "")); + ASSERT (mbs_endswith ("abc", "")); + + ASSERT (!mbs_endswith ("", "c")); + ASSERT (!mbs_endswith ("x", "c")); + ASSERT (mbs_endswith ("c", "c")); + ASSERT (mbs_endswith ("abc", "c")); + + ASSERT (!mbs_endswith ("", "xyz")); + ASSERT (!mbs_endswith ("x", "xyz")); + ASSERT (!mbs_endswith ("a", "xyz")); + ASSERT (!mbs_endswith ("abc", "xyz")); + ASSERT (mbs_endswith ("xyz", "xyz")); + ASSERT (mbs_endswith ("yxxyz", "xyz")); + + ASSERT (mbs_endswith ("", "")); + ASSERT (mbs_endswith ("\303\244\306\200\303\247", "")); /* "??????" */ + + ASSERT (!mbs_endswith ("", "\303\247")); /* "??" */ + ASSERT (!mbs_endswith ("\341\272\213", "\303\247")); /* "???" "??" */ + ASSERT (mbs_endswith ("\303\247", "\303\247")); /* "??" "??" */ + ASSERT (mbs_endswith ("\303\244\306\200\303\247", "\303\247")); /* "??????" "??" */ + + ASSERT (!mbs_endswith ("", "\341\272\213\303\277\341\272\221")); /* "????????" */ + ASSERT (!mbs_endswith ("\341\272\213", "\341\272\213\303\277\341\272\221")); /* "???" "????????" */ + ASSERT (!mbs_endswith ("\303\244", "\341\272\213\303\277\341\272\221")); /* "??" "????????" */ + ASSERT (!mbs_endswith ("\303\244\306\200\303\247", "\341\272\213\303\277\341\272\221")); /* "??????" "????????" */ + ASSERT (mbs_endswith ("\341\272\213\303\277\341\272\221", "\341\272\213\303\277\341\272\221")); /* "????????" "????????" */ + ASSERT (mbs_endswith ("\303\277\341\272\213\341\272\213\303\277\341\272\221", "\341\272\213\303\277\341\272\221")); /* "?????????????" "????????" */ + + return test_exit_status; +} diff --git a/tests/test-mbs_endswith2.sh b/tests/test-mbs_endswith2.sh new file mode 100755 index 0000000000..464e9e9956 --- /dev/null +++ b/tests/test-mbs_endswith2.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +# Test whether a specific UTF-8 locale is installed. +: "${LOCALE_EN_UTF8=en_US.UTF-8}" +: "${LOCALE_FR_UTF8=fr_FR.UTF-8}" +if test "$LOCALE_EN_UTF8" = none && test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no english or french Unicode locale is installed" + else + echo "Skipping test: no english or french Unicode locale is supported" + fi + exit 77 +fi + +# It's sufficient to test in one of the two locales. +if test $LOCALE_FR_UTF8 != none; then + testlocale=$LOCALE_FR_UTF8 +else + testlocale="$LOCALE_EN_UTF8" +fi + +LC_ALL="$testlocale" \ +${CHECKER} ./test-mbs_endswith2${EXEEXT} diff --git a/tests/test-mbs_endswith3.c b/tests/test-mbs_endswith3.c new file mode 100644 index 0000000000..ee7c1d4a2d --- /dev/null +++ b/tests/test-mbs_endswith3.c @@ -0,0 +1,71 @@ +/* Test of mbs_endswith() function. + Copyright (C) 2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2025. */ + +#include <config.h> + +#include <string.h> + +#include <locale.h> + +#include "macros.h" + +int +main () +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + ASSERT (mbs_endswith ("", "")); + ASSERT (mbs_endswith ("abc", "")); + + ASSERT (!mbs_endswith ("", "c")); + ASSERT (!mbs_endswith ("x", "c")); + ASSERT (mbs_endswith ("c", "c")); + ASSERT (mbs_endswith ("abc", "c")); + + ASSERT (!mbs_endswith ("", "xyz")); + ASSERT (!mbs_endswith ("x", "xyz")); + ASSERT (!mbs_endswith ("a", "xyz")); + ASSERT (!mbs_endswith ("abc", "xyz")); + ASSERT (mbs_endswith ("xyz", "xyz")); + ASSERT (mbs_endswith ("yxxyz", "xyz")); + + ASSERT (mbs_endswith ("", "")); + ASSERT (mbs_endswith ("\201\060\212\061\201\060\227\070\201\060\212\064", "")); /* "??????" */ + + ASSERT (!mbs_endswith ("", "\201\060\212\064")); /* "??" */ + ASSERT (!mbs_endswith ("\201\065\374\063", "\201\060\212\064")); /* "???" "??" */ + ASSERT (mbs_endswith ("\201\060\212\064", "\201\060\212\064")); /* "??" "??" */ + ASSERT (mbs_endswith ("\201\060\212\061\201\060\227\070\201\060\212\064", "\201\060\212\064")); /* "??????" "??" */ + ASSERT (!mbs_endswith ("\201\060\212\061\201\060\227\070\201\060\212\064", "4")); /* "??????" */ + ASSERT (!mbs_endswith ("\201\060\212\061\201\060\227\070\201\060\212\064", "0\212\064")); /* "??????" "0?" */ + ASSERT (!mbs_endswith ("\201\060\212\061\201\060\227\070\201\060\212\064", "8\201\060\212\064")); /* "??????" "8??" */ + + ASSERT (!mbs_endswith ("", "\201\065\374\063\201\060\213\067\201\065\374\071")); /* "????????" */ + ASSERT (!mbs_endswith ("\201\065\374\063", "\201\065\374\063\201\060\213\067\201\065\374\071")); /* "???" "????????" */ + ASSERT (!mbs_endswith ("\201\060\212\061", "\201\065\374\063\201\060\213\067\201\065\374\071")); /* "??" "????????" */ + ASSERT (!mbs_endswith ("\201\060\212\061\201\060\227\070\201\060\212\064", "\201\065\374\063\201\060\213\067\201\065\374\071")); /* "??????" "????????" */ + ASSERT (mbs_endswith ("\201\065\374\063\201\060\213\067\201\065\374\071", "\201\065\374\063\201\060\213\067\201\065\374\071")); /* "????????" "????????" */ + ASSERT (mbs_endswith ("\201\060\213\067\201\065\374\063\201\065\374\063\201\060\213\067\201\065\374\071", "\201\065\374\063\201\060\213\067\201\065\374\071")); /* "?????????????" "????????" */ + ASSERT (!mbs_endswith ("\201\065\374\063\201\060\213\067\201\065\374\071", "9")); /* "????????" */ + ASSERT (!mbs_endswith ("\201\065\374\063\201\060\213\067\201\065\374\071", "5\374\071")); /* "????????" "5?" */ + ASSERT (!mbs_endswith ("\201\065\374\063\201\060\213\067\201\065\374\071", "7\201\065\374\071")); /* "????????" "7???" */ + + return test_exit_status; +} diff --git a/tests/test-mbs_endswith3.sh b/tests/test-mbs_endswith3.sh new file mode 100755 index 0000000000..fe0df625de --- /dev/null +++ b/tests/test-mbs_endswith3.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific GB18030 locale is installed. +: "${LOCALE_ZH_CN=zh_CN.GB18030}" +if test $LOCALE_ZH_CN = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no chinese GB18030 locale is installed" + else + echo "Skipping test: no chinese GB18030 locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_ZH_CN \ +${CHECKER} ./test-mbs_endswith3${EXEEXT} -- 2.43.0
>From 112ba3d0677b4c717c893a5744d07b4ea69c36ce Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Fri, 3 Jan 2025 14:05:57 +0100 Subject: [PATCH 4/4] doc: Mention the new modules. * doc/strings.texi (Comparison of string APIs): Add rows for startswith and endswith functions. --- ChangeLog | 6 ++++++ doc/strings.texi | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3cce627d4b..01529c0411 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2025-01-03 Bruno Haible <br...@clisp.org> + + doc: Mention the new modules. + * doc/strings.texi (Comparison of string APIs): Add rows for startswith + and endswith functions. + 2025-01-03 Bruno Haible <br...@clisp.org> mbs_endswith: Add tests. diff --git a/doc/strings.texi b/doc/strings.texi index 89d48b7adb..94cc61c9be 100644 --- a/doc/strings.texi +++ b/doc/strings.texi @@ -320,6 +320,8 @@ @mindex mbstok_r @mindex mbstowcs @mindex mbswidth +@mindex mbs_endswith +@mindex mbs_startswith @mindex stpcpy @mindex stpncpy @mindex strcase @@ -343,8 +345,12 @@ @mindex strtoul @mindex strtoull @mindex strtoumax +@mindex str_endswith +@mindex str_startswith @mindex unicase/u32-casecmp @mindex unistr/u32-mbsnlen +@mindex unistr/u32-endswith +@mindex unistr/u32-startswith @mindex unistr/u32-stpcpy @mindex unistr/u32-stpncpy @mindex unistr/u32-strcat @@ -483,6 +489,20 @@ @tab -- @tab -- +@item @code{str_startswith} +@tab @code{str_startswith} +@tab @code{mbs_startswith} +@tab -- +@tab -- +@tab @code{u32_startswith} + +@item @code{str_endswith} +@tab @code{str_endswith} +@tab @code{mbs_endswith} +@tab -- +@tab -- +@tab @code{u32_endswith} + @item @code{strspn} @tab @code{strspn} @tab @code{mbsspn} -- 2.43.0