For robustness, let me add tests against the glibc bug https://sourceware.org/bugzilla/show_bug.cgi?id=19932 also to the mb*c32* unit tests.
Here, no fixes are needed, since the fixes are already in the mb*wc* and mbrtoc32 modules. 2023-04-03 Bruno Haible <br...@clisp.org> mbstoc32s tests: Check behaviour in the C locale. * tests/test-mbstoc32s.c (main): Test behaviour in the C locale. Based on tests/test-mbstowcs.c. * tests/test-mbstoc32s-5.sh: New file, based on tests/test-mbstowcs5.sh. * modules/mbstoc32s-tests (Files): Add it. (Depends-on): Add btoc32. (Makefile.am): Run test-mbstoc32s-5.sh. 2023-04-03 Bruno Haible <br...@clisp.org> mbsnrtoc32s tests: Check behaviour in the C locale. * tests/test-mbsnrtoc32s.c (main): Test behaviour in the C locale. Based on tests/test-mbsnrtowcs.c. * tests/test-mbsnrtoc32s-5.sh: New file, based on tests/test-mbsrtowcs5.sh. * modules/mbsnrtoc32s-tests (Files): Add it. (Depends-on): Add btoc32. (Makefile.am): Run test-mbsnrtoc32s-5.sh. 2023-04-03 Bruno Haible <br...@clisp.org> mbsrtoc32s tests: Check behaviour in the C locale. * tests/test-mbsrtoc32s.c (main): Test behaviour in the C locale. Based on tests/test-mbsrtowcs.c. * tests/test-mbsrtoc32s-5.sh: New file, based on tests/test-mbsrtowcs5.sh. * modules/mbsrtoc32s-tests (Files): Add it. (Depends-on): Add btoc32. (Makefile.am): Run test-mbsrtoc32s-5.sh. 2023-04-03 Bruno Haible <br...@clisp.org> btoc32 tests: Check behaviour in the C locale. * tests/test-btoc32.c (main): Test behaviour in the C locale. Based on tests/test-btowc.c. * tests/test-btoc32-3.sh: New file, based on tests/test-btowc3.sh. * modules/btoc32-tests (Files): Add it. (Makefile.am): Test it. 2023-04-03 Bruno Haible <br...@clisp.org> mbrtoc32 tests: Prefer *c32* functions. * tests/test-mbrtoc32.c (main): Use btoc32 instead of btowc. * modules/mbrtoc32-tests (Depends-on): Add btoc32. 2023-04-03 Bruno Haible <br...@clisp.org> mbrtoc32 tests: Add comment. * m4/mbrtowc.m4 (gl_MBRTOWC_C_LOCALE): Add comment. * m4/mbrtoc32.m4 (gl_MBRTOC32_C_LOCALE): Add comment. * tests/test-mbrtoc32.c: Add comment. * tests/test-mbrtoc32-5.sh: Use symmetric coding style. * doc/posix-functions/mbrtoc32.texi: Update.
>From 09a9ccc95ddd3e4ffbf386cca440b944c91d3412 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 3 Apr 2023 14:24:37 +0200 Subject: [PATCH 1/6] mbrtoc32 tests: Add comment. * m4/mbrtowc.m4 (gl_MBRTOWC_C_LOCALE): Add comment. * m4/mbrtoc32.m4 (gl_MBRTOC32_C_LOCALE): Add comment. * tests/test-mbrtoc32.c: Add comment. * tests/test-mbrtoc32-5.sh: Use symmetric coding style. * doc/posix-functions/mbrtoc32.texi: Update. --- ChangeLog | 9 +++++++++ doc/posix-functions/mbrtoc32.texi | 2 +- m4/mbrtoc32.m4 | 7 ++++++- m4/mbrtowc.m4 | 4 ++++ tests/test-mbrtoc32-5.sh | 7 +++++-- tests/test-mbrtoc32.c | 4 ++++ 6 files changed, 29 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2993b1561f..7bc1a008f1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2023-04-03 Bruno Haible <br...@clisp.org> + + mbrtoc32 tests: Add comment. + * m4/mbrtowc.m4 (gl_MBRTOWC_C_LOCALE): Add comment. + * m4/mbrtoc32.m4 (gl_MBRTOC32_C_LOCALE): Add comment. + * tests/test-mbrtoc32.c: Add comment. + * tests/test-mbrtoc32-5.sh: Use symmetric coding style. + * doc/posix-functions/mbrtoc32.texi: Update. + 2023-04-02 Bruno Haible <br...@clisp.org> trim: Fix trim_trailing result in multibyte locales. diff --git a/doc/posix-functions/mbrtoc32.texi b/doc/posix-functions/mbrtoc32.texi index d95c670200..7c3d6c294a 100644 --- a/doc/posix-functions/mbrtoc32.texi +++ b/doc/posix-functions/mbrtoc32.texi @@ -12,7 +12,7 @@ @item In the C or POSIX locales, this function can return @code{(size_t) -1} and set @code{errno} to @code{EILSEQ}: -glibc 2.23. +glibc 2.35. @item This function returns 0 instead of @code{(size_t) -2} when the input is empty: diff --git a/m4/mbrtoc32.m4 b/m4/mbrtoc32.m4 index f2de33aff6..d6401fe2e4 100644 --- a/m4/mbrtoc32.m4 +++ b/m4/mbrtoc32.m4 @@ -1,4 +1,4 @@ -# mbrtoc32.m4 serial 10 +# mbrtoc32.m4 serial 11 dnl Copyright (C) 2014-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -101,6 +101,11 @@ AC_DEFUN([gl_MBRTOC32_EMPTY_INPUT] ]) ]) +dnl <https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrtowc.html> +dnl POSIX:2018 says regarding mbrtowc: "In the POSIX locale an [EILSEQ] error +dnl cannot occur since all byte values are valid characters." It is reasonable +dnl to expect mbrtoc32 to behave in the same way. + AC_DEFUN([gl_MBRTOC32_C_LOCALE], [ AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles diff --git a/m4/mbrtowc.m4 b/m4/mbrtowc.m4 index 4bf91bec3b..0c7a8bae69 100644 --- a/m4/mbrtowc.m4 +++ b/m4/mbrtowc.m4 @@ -712,6 +712,10 @@ AC_DEFUN([gl_MBRTOWC_EMPTY_INPUT] dnl Although POSIX was never intended to allow this, the GNU C Library dnl and other implementations do it. See: dnl https://sourceware.org/bugzilla/show_bug.cgi?id=19932 +dnl POSIX has now clarified it: +dnl <https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrtowc.html> +dnl says: "In the POSIX locale an [EILSEQ] error cannot occur since all byte +dnl values are valid characters." AC_DEFUN([gl_MBRTOWC_C_LOCALE], [ diff --git a/tests/test-mbrtoc32-5.sh b/tests/test-mbrtoc32-5.sh index cd000fb3c7..4213772272 100755 --- a/tests/test-mbrtoc32-5.sh +++ b/tests/test-mbrtoc32-5.sh @@ -1,6 +1,9 @@ #!/bin/sh + # Test whether the POSIX locale has encoding errors. LC_ALL=C \ -${CHECKER} ./test-mbrtoc32${EXEEXT} 5 || exit +${CHECKER} ./test-mbrtoc32${EXEEXT} 5 || exit 1 LC_ALL=POSIX \ -${CHECKER} ./test-mbrtoc32${EXEEXT} 5 +${CHECKER} ./test-mbrtoc32${EXEEXT} 5 || exit 1 + +exit 0 diff --git a/tests/test-mbrtoc32.c b/tests/test-mbrtoc32.c index 0d75c3db14..a40309004e 100644 --- a/tests/test-mbrtoc32.c +++ b/tests/test-mbrtoc32.c @@ -386,6 +386,10 @@ main (int argc, char *argv[]) wc = (char32_t) 0xBADFACE; ret = mbrtoc32 (&wc, buf, 1, &state); + /* POSIX:2018 says regarding mbrtowc: "In the POSIX locale an + [EILSEQ] error cannot occur since all byte values are valid + characters." It is reasonable to expect mbrtoc32 to behave + in the same way. */ ASSERT (ret == 1); if (c < 0x80) /* c is an ASCII character. */ -- 2.34.1
>From 13679eb960abb41db5557bf364d7b31bc27d6f55 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 3 Apr 2023 14:24:39 +0200 Subject: [PATCH 2/6] mbrtoc32 tests: Prefer *c32* functions. * tests/test-mbrtoc32.c (main): Use btoc32 instead of btowc. * modules/mbrtoc32-tests (Depends-on): Add btoc32. --- ChangeLog | 6 ++++++ modules/mbrtoc32-tests | 1 + tests/test-mbrtoc32.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 7bc1a008f1..901aa4331a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2023-04-03 Bruno Haible <br...@clisp.org> + + mbrtoc32 tests: Prefer *c32* functions. + * tests/test-mbrtoc32.c (main): Use btoc32 instead of btowc. + * modules/mbrtoc32-tests (Depends-on): Add btoc32. + 2023-04-03 Bruno Haible <br...@clisp.org> mbrtoc32 tests: Add comment. diff --git a/modules/mbrtoc32-tests b/modules/mbrtoc32-tests index e123772a20..95cc4750bd 100644 --- a/modules/mbrtoc32-tests +++ b/modules/mbrtoc32-tests @@ -22,6 +22,7 @@ m4/codeset.m4 Depends-on: mbsinit +btoc32 c32tob setlocale localcharset diff --git a/tests/test-mbrtoc32.c b/tests/test-mbrtoc32.c index a40309004e..ecde031f36 100644 --- a/tests/test-mbrtoc32.c +++ b/tests/test-mbrtoc32.c @@ -397,7 +397,7 @@ main (int argc, char *argv[]) else /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF. But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */ - ASSERT (wc == (btowc (c) == 0xDF00 + c ? btowc (c) : c)); + ASSERT (wc == (btoc32 (c) == 0xDF00 + c ? btoc32 (c) : c)); ASSERT (mbsinit (&state)); ret = mbrtoc32 (NULL, buf, 1, &state); -- 2.34.1
From d36467df509b488bc069a3c247a3ee595874f431 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 3 Apr 2023 14:24:42 +0200 Subject: [PATCH 3/6] btoc32 tests: Check behaviour in the C locale. * tests/test-btoc32.c (main): Test behaviour in the C locale. Based on tests/test-btowc.c. * tests/test-btoc32-3.sh: New file, based on tests/test-btowc3.sh. * modules/btoc32-tests (Files): Add it. (Makefile.am): Test it. --- ChangeLog | 9 +++++++++ modules/btoc32-tests | 3 ++- tests/test-btoc32-3.sh | 9 +++++++++ tests/test-btoc32.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100755 tests/test-btoc32-3.sh diff --git a/ChangeLog b/ChangeLog index 901aa4331a..b05eb8f846 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2023-04-03 Bruno Haible <br...@clisp.org> + + btoc32 tests: Check behaviour in the C locale. + * tests/test-btoc32.c (main): Test behaviour in the C locale. Based on + tests/test-btowc.c. + * tests/test-btoc32-3.sh: New file, based on tests/test-btowc3.sh. + * modules/btoc32-tests (Files): Add it. + (Makefile.am): Test it. + 2023-04-03 Bruno Haible <br...@clisp.org> mbrtoc32 tests: Prefer *c32* functions. diff --git a/modules/btoc32-tests b/modules/btoc32-tests index b3373f3cf8..b9d14eefd1 100644 --- a/modules/btoc32-tests +++ b/modules/btoc32-tests @@ -1,6 +1,7 @@ Files: tests/test-btoc32-1.sh tests/test-btoc32-2.sh +tests/test-btoc32-3.sh tests/test-btoc32.c tests/signature.h tests/macros.h @@ -15,7 +16,7 @@ gt_LOCALE_FR gt_LOCALE_FR_UTF8 Makefile.am: -TESTS += test-btoc32-1.sh test-btoc32-2.sh +TESTS += test-btoc32-1.sh test-btoc32-2.sh test-btoc32-3.sh TESTS_ENVIRONMENT += LOCALE_FR='@LOCALE_FR@' LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' check_PROGRAMS += test-btoc32 test_btoc32_LDADD = $(LDADD) $(SETLOCALE_LIB) diff --git a/tests/test-btoc32-3.sh b/tests/test-btoc32-3.sh new file mode 100755 index 0000000000..422c3daed5 --- /dev/null +++ b/tests/test-btoc32-3.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +# Test whether the POSIX locale has encoding errors. +LC_ALL=C \ +${CHECKER} ./test-btoc32${EXEEXT} 3 || exit 1 +LC_ALL=POSIX \ +${CHECKER} ./test-btoc32${EXEEXT} 3 || exit 1 + +exit 0 diff --git a/tests/test-btoc32.c b/tests/test-btoc32.c index 90b38cfcf5..a2c48dde6f 100644 --- a/tests/test-btoc32.c +++ b/tests/test-btoc32.c @@ -40,6 +40,15 @@ main (int argc, char *argv[]) ASSERT (btoc32 (EOF) == WEOF); +#ifdef __ANDROID__ + /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the + "C" locale. Furthermore, when you attempt to set the "C" or "POSIX" + locale via setlocale(), what you get is a "C" locale with UTF-8 encoding, + that is, effectively the "C.UTF-8" locale. */ + if (argc > 1 && strcmp (argv[1], "3") == 0 && MB_CUR_MAX > 1) + argv[1] = "2"; +#endif + if (argc > 1) switch (argv[1][0]) { @@ -58,6 +67,27 @@ main (int argc, char *argv[]) for (c = 0x80; c < 0x100; c++) ASSERT (btoc32 (c) == WEOF); return 0; + + case '3': + /* C or POSIX locale. */ + for (c = 0; c < 0x100; c++) + if (c != 0) + { + /* We are testing all nonnull bytes. */ + wint_t wc = btoc32 (c); + /* POSIX:2018 says regarding btowc: "In the POSIX locale, btowc() + shall not return WEOF if c has a value in the range 0 to 255 + inclusive." It is reasonable to expect btoc32 to behave in + the same way. */ + if (c < 0x80) + /* c is an ASCII character. */ + ASSERT (wc == c); + else + /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF. + But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */ + ASSERT (wc == c || wc == 0xDF00 + c); + } + return 0; } return 1; -- 2.34.1
From db6135d5d5bc642bb8c95c384512f3f1aff68ebb Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 3 Apr 2023 14:24:45 +0200 Subject: [PATCH 4/6] mbsrtoc32s tests: Check behaviour in the C locale. * tests/test-mbsrtoc32s.c (main): Test behaviour in the C locale. Based on tests/test-mbsrtowcs.c. * tests/test-mbsrtoc32s-5.sh: New file, based on tests/test-mbsrtowcs5.sh. * modules/mbsrtoc32s-tests (Files): Add it. (Depends-on): Add btoc32. (Makefile.am): Run test-mbsrtoc32s-5.sh. --- ChangeLog | 11 ++++++ modules/mbsrtoc32s-tests | 6 ++- tests/test-mbsrtoc32s-5.sh | 9 +++++ tests/test-mbsrtoc32s.c | 77 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100755 tests/test-mbsrtoc32s-5.sh diff --git a/ChangeLog b/ChangeLog index b05eb8f846..7fbb423791 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2023-04-03 Bruno Haible <br...@clisp.org> + + mbsrtoc32s tests: Check behaviour in the C locale. + * tests/test-mbsrtoc32s.c (main): Test behaviour in the C locale. Based + on tests/test-mbsrtowcs.c. + * tests/test-mbsrtoc32s-5.sh: New file, based on + tests/test-mbsrtowcs5.sh. + * modules/mbsrtoc32s-tests (Files): Add it. + (Depends-on): Add btoc32. + (Makefile.am): Run test-mbsrtoc32s-5.sh. + 2023-04-03 Bruno Haible <br...@clisp.org> btoc32 tests: Check behaviour in the C locale. diff --git a/modules/mbsrtoc32s-tests b/modules/mbsrtoc32s-tests index f794c5ae32..0adc62368e 100644 --- a/modules/mbsrtoc32s-tests +++ b/modules/mbsrtoc32s-tests @@ -3,6 +3,7 @@ tests/test-mbsrtoc32s-1.sh tests/test-mbsrtoc32s-2.sh tests/test-mbsrtoc32s-3.sh tests/test-mbsrtoc32s-4.sh +tests/test-mbsrtoc32s-5.sh tests/test-mbsrtoc32s.c tests/signature.h tests/macros.h @@ -14,6 +15,7 @@ m4/codeset.m4 Depends-on: mbrtoc32 mbsinit +btoc32 c32tob setlocale @@ -24,7 +26,9 @@ gt_LOCALE_JA gt_LOCALE_ZH_CN Makefile.am: -TESTS += test-mbsrtoc32s-1.sh test-mbsrtoc32s-2.sh test-mbsrtoc32s-3.sh test-mbsrtoc32s-4.sh +TESTS += \ + test-mbsrtoc32s-1.sh test-mbsrtoc32s-2.sh test-mbsrtoc32s-3.sh \ + test-mbsrtoc32s-4.sh test-mbsrtoc32s-5.sh TESTS_ENVIRONMENT += \ LOCALE_FR='@LOCALE_FR@' \ LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ diff --git a/tests/test-mbsrtoc32s-5.sh b/tests/test-mbsrtoc32s-5.sh new file mode 100755 index 0000000000..fb922b2b26 --- /dev/null +++ b/tests/test-mbsrtoc32s-5.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +# Test whether the POSIX locale has encoding errors. +LC_ALL=C \ +${CHECKER} ./test-mbsrtoc32s${EXEEXT} 5 || exit 1 +LC_ALL=POSIX \ +${CHECKER} ./test-mbsrtoc32s${EXEEXT} 5 || exit 1 + +exit 0 diff --git a/tests/test-mbsrtoc32s.c b/tests/test-mbsrtoc32s.c index ef985fdc03..aed9d7b2fb 100644 --- a/tests/test-mbsrtoc32s.c +++ b/tests/test-mbsrtoc32s.c @@ -72,6 +72,15 @@ main (int argc, char *argv[]) ASSERT (mbsinit (&state)); } +#ifdef __ANDROID__ + /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the + "C" locale. Furthermore, when you attempt to set the "C" or "POSIX" + locale via setlocale(), what you get is a "C" locale with UTF-8 encoding, + that is, effectively the "C.UTF-8" locale. */ + if (argc > 1 && strcmp (argv[1], "5") == 0 && MB_CUR_MAX > 1) + argv[1] = "2"; +#endif + if (argc > 1) { int unlimited; @@ -281,6 +290,74 @@ main (int argc, char *argv[]) } break; + case '5': + /* C or POSIX locale. */ + { + char input[] = "n/a"; + memset (&state, '\0', sizeof (mbstate_t)); + + src = input; + temp_state = state; + ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state); + ASSERT (ret == 3); + ASSERT (src == input); + ASSERT (mbsinit (&state)); + + src = input; + ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 1, &state); + ASSERT (ret == (unlimited ? 3 : 1)); + ASSERT (src == (unlimited ? NULL : input + 1)); + ASSERT (buf[0] == 'n'); + if (unlimited) + { + ASSERT (buf[1] == '/'); + ASSERT (buf[2] == 'a'); + ASSERT (buf[3] == 0); + ASSERT (buf[4] == (char32_t) 0xBADFACE); + } + else + ASSERT (buf[1] == (char32_t) 0xBADFACE); + ASSERT (mbsinit (&state)); + } + { + int c; + char input[2]; + + memset (&state, '\0', sizeof (mbstate_t)); + for (c = 0; c < 0x100; c++) + if (c != 0) + { + /* We are testing all nonnull bytes. */ + input[0] = c; + input[1] = '\0'; + + src = input; + ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 1, &state); + ASSERT (ret == 1); + ASSERT (src == input); + ASSERT (mbsinit (&state)); + + buf[0] = buf[1] = (char32_t) 0xBADFACE; + src = input; + ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 1, &state); + /* POSIX:2018 says regarding mbsrtowcs: "In the POSIX locale an + [EILSEQ] error cannot occur since all byte values are valid + characters." It is reasonable to expect mbsrtoc32s to behave + in the same way. */ + ASSERT (ret == 1); + ASSERT (src == (unlimited ? NULL : input + 1)); + if (c < 0x80) + /* c is an ASCII character. */ + ASSERT (buf[0] == c); + else + /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF. + But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */ + ASSERT (buf[0] == (btoc32 (c) == 0xDF00 + c ? btoc32 (c) : c)); + ASSERT (mbsinit (&state)); + } + } + break; + default: return 1; } -- 2.34.1
From 52178721fa208bade898c3d14aed806e87bce642 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 3 Apr 2023 14:24:48 +0200 Subject: [PATCH 5/6] mbsnrtoc32s tests: Check behaviour in the C locale. * tests/test-mbsnrtoc32s.c (main): Test behaviour in the C locale. Based on tests/test-mbsnrtowcs.c. * tests/test-mbsnrtoc32s-5.sh: New file, based on tests/test-mbsrtowcs5.sh. * modules/mbsnrtoc32s-tests (Files): Add it. (Depends-on): Add btoc32. (Makefile.am): Run test-mbsnrtoc32s-5.sh. --- ChangeLog | 11 ++++++ modules/mbsnrtoc32s-tests | 6 ++- tests/test-mbsnrtoc32s-5.sh | 9 +++++ tests/test-mbsnrtoc32s.c | 77 +++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100755 tests/test-mbsnrtoc32s-5.sh diff --git a/ChangeLog b/ChangeLog index 7fbb423791..4699781f9f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2023-04-03 Bruno Haible <br...@clisp.org> + + mbsnrtoc32s tests: Check behaviour in the C locale. + * tests/test-mbsnrtoc32s.c (main): Test behaviour in the C locale. Based + on tests/test-mbsnrtowcs.c. + * tests/test-mbsnrtoc32s-5.sh: New file, based on + tests/test-mbsrtowcs5.sh. + * modules/mbsnrtoc32s-tests (Files): Add it. + (Depends-on): Add btoc32. + (Makefile.am): Run test-mbsnrtoc32s-5.sh. + 2023-04-03 Bruno Haible <br...@clisp.org> mbsrtoc32s tests: Check behaviour in the C locale. diff --git a/modules/mbsnrtoc32s-tests b/modules/mbsnrtoc32s-tests index 449c277aa0..c7af163045 100644 --- a/modules/mbsnrtoc32s-tests +++ b/modules/mbsnrtoc32s-tests @@ -3,6 +3,7 @@ tests/test-mbsnrtoc32s-1.sh tests/test-mbsnrtoc32s-2.sh tests/test-mbsnrtoc32s-3.sh tests/test-mbsnrtoc32s-4.sh +tests/test-mbsnrtoc32s-5.sh tests/test-mbsnrtoc32s.c tests/signature.h tests/macros.h @@ -14,6 +15,7 @@ m4/codeset.m4 Depends-on: mbrtoc32 mbsinit +btoc32 c32tob setlocale @@ -24,7 +26,9 @@ gt_LOCALE_JA gt_LOCALE_ZH_CN Makefile.am: -TESTS += test-mbsnrtoc32s-1.sh test-mbsnrtoc32s-2.sh test-mbsnrtoc32s-3.sh test-mbsnrtoc32s-4.sh +TESTS += \ + test-mbsnrtoc32s-1.sh test-mbsnrtoc32s-2.sh test-mbsnrtoc32s-3.sh \ + test-mbsnrtoc32s-4.sh test-mbsnrtoc32s-5.sh TESTS_ENVIRONMENT += \ LOCALE_FR='@LOCALE_FR@' \ LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ diff --git a/tests/test-mbsnrtoc32s-5.sh b/tests/test-mbsnrtoc32s-5.sh new file mode 100755 index 0000000000..768d10b1e8 --- /dev/null +++ b/tests/test-mbsnrtoc32s-5.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +# Test whether the POSIX locale has encoding errors. +LC_ALL=C \ +${CHECKER} ./test-mbsnrtoc32s${EXEEXT} 5 || exit 1 +LC_ALL=POSIX \ +${CHECKER} ./test-mbsnrtoc32s${EXEEXT} 5 || exit 1 + +exit 0 diff --git a/tests/test-mbsnrtoc32s.c b/tests/test-mbsnrtoc32s.c index 88c11f5e47..c18b66ab06 100644 --- a/tests/test-mbsnrtoc32s.c +++ b/tests/test-mbsnrtoc32s.c @@ -72,6 +72,15 @@ main (int argc, char *argv[]) ASSERT (mbsinit (&state)); } +#ifdef __ANDROID__ + /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the + "C" locale. Furthermore, when you attempt to set the "C" or "POSIX" + locale via setlocale(), what you get is a "C" locale with UTF-8 encoding, + that is, effectively the "C.UTF-8" locale. */ + if (argc > 1 && strcmp (argv[1], "5") == 0 && MB_CUR_MAX > 1) + argv[1] = "2"; +#endif + if (argc > 1) { int unlimited; @@ -281,6 +290,74 @@ main (int argc, char *argv[]) } break; + case '5': + /* C or POSIX locale. */ + { + char input[] = "n/a"; + memset (&state, '\0', sizeof (mbstate_t)); + + src = input; + temp_state = state; + ret = mbsnrtoc32s (NULL, &src, 4, unlimited ? BUFSIZE : 1, &temp_state); + ASSERT (ret == 3); + ASSERT (src == input); + ASSERT (mbsinit (&state)); + + src = input; + ret = mbsnrtoc32s (buf, &src, 4, unlimited ? BUFSIZE : 1, &state); + ASSERT (ret == (unlimited ? 3 : 1)); + ASSERT (src == (unlimited ? NULL : input + 1)); + ASSERT (buf[0] == 'n'); + if (unlimited) + { + ASSERT (buf[1] == '/'); + ASSERT (buf[2] == 'a'); + ASSERT (buf[3] == 0); + ASSERT (buf[4] == (char32_t) 0xBADFACE); + } + else + ASSERT (buf[1] == (char32_t) 0xBADFACE); + ASSERT (mbsinit (&state)); + } + { + int c; + char input[2]; + + memset (&state, '\0', sizeof (mbstate_t)); + for (c = 0; c < 0x100; c++) + if (c != 0) + { + /* We are testing all nonnull bytes. */ + input[0] = c; + input[1] = '\0'; + + src = input; + ret = mbsnrtoc32s (NULL, &src, 2, unlimited ? BUFSIZE : 1, &state); + ASSERT (ret == 1); + ASSERT (src == input); + ASSERT (mbsinit (&state)); + + buf[0] = buf[1] = (char32_t) 0xBADFACE; + src = input; + ret = mbsnrtoc32s (buf, &src, 2, unlimited ? BUFSIZE : 1, &state); + /* POSIX:2018 says regarding mbsnrtowcs: "In the POSIX locale an + [EILSEQ] error cannot occur since all byte values are valid + characters." It is reasonable to expect mbsnrtoc32s to behave + in the same way. */ + ASSERT (ret == 1); + ASSERT (src == (unlimited ? NULL : input + 1)); + if (c < 0x80) + /* c is an ASCII character. */ + ASSERT (buf[0] == c); + else + /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF. + But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */ + ASSERT (buf[0] == (btoc32 (c) == 0xDF00 + c ? btoc32 (c) : c)); + ASSERT (mbsinit (&state)); + } + } + break; + default: return 1; } -- 2.34.1
From 144cf2bd1009ecc82ea289fa2b05d89d7555f458 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Mon, 3 Apr 2023 14:24:50 +0200 Subject: [PATCH 6/6] mbstoc32s tests: Check behaviour in the C locale. * tests/test-mbstoc32s.c (main): Test behaviour in the C locale. Based on tests/test-mbstowcs.c. * tests/test-mbstoc32s-5.sh: New file, based on tests/test-mbstowcs5.sh. * modules/mbstoc32s-tests (Files): Add it. (Depends-on): Add btoc32. (Makefile.am): Run test-mbstoc32s-5.sh. --- ChangeLog | 10 ++++++ modules/mbstoc32s-tests | 6 +++- tests/test-mbstoc32s-5.sh | 9 ++++++ tests/test-mbstoc32s.c | 66 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100755 tests/test-mbstoc32s-5.sh diff --git a/ChangeLog b/ChangeLog index 4699781f9f..5c916c133b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2023-04-03 Bruno Haible <br...@clisp.org> + + mbstoc32s tests: Check behaviour in the C locale. + * tests/test-mbstoc32s.c (main): Test behaviour in the C locale. Based + on tests/test-mbstowcs.c. + * tests/test-mbstoc32s-5.sh: New file, based on tests/test-mbstowcs5.sh. + * modules/mbstoc32s-tests (Files): Add it. + (Depends-on): Add btoc32. + (Makefile.am): Run test-mbstoc32s-5.sh. + 2023-04-03 Bruno Haible <br...@clisp.org> mbsnrtoc32s tests: Check behaviour in the C locale. diff --git a/modules/mbstoc32s-tests b/modules/mbstoc32s-tests index 565796a06f..9e6dd61d26 100644 --- a/modules/mbstoc32s-tests +++ b/modules/mbstoc32s-tests @@ -3,6 +3,7 @@ tests/test-mbstoc32s-1.sh tests/test-mbstoc32s-2.sh tests/test-mbstoc32s-3.sh tests/test-mbstoc32s-4.sh +tests/test-mbstoc32s-5.sh tests/test-mbstoc32s.c tests/signature.h tests/macros.h @@ -12,6 +13,7 @@ m4/locale-zh.m4 m4/codeset.m4 Depends-on: +btoc32 c32tob setlocale @@ -22,7 +24,9 @@ gt_LOCALE_JA gt_LOCALE_ZH_CN Makefile.am: -TESTS += test-mbstoc32s-1.sh test-mbstoc32s-2.sh test-mbstoc32s-3.sh test-mbstoc32s-4.sh +TESTS += \ + test-mbstoc32s-1.sh test-mbstoc32s-2.sh test-mbstoc32s-3.sh \ + test-mbstoc32s-4.sh test-mbstoc32s-5.sh TESTS_ENVIRONMENT += \ LOCALE_FR='@LOCALE_FR@' \ LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ diff --git a/tests/test-mbstoc32s-5.sh b/tests/test-mbstoc32s-5.sh new file mode 100755 index 0000000000..edb36432b8 --- /dev/null +++ b/tests/test-mbstoc32s-5.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +# Test whether the POSIX locale has encoding errors. +LC_ALL=C \ +${CHECKER} ./test-mbstoc32s${EXEEXT} 5 || exit 1 +LC_ALL=POSIX \ +${CHECKER} ./test-mbstoc32s${EXEEXT} 5 || exit 1 + +exit 0 diff --git a/tests/test-mbstoc32s.c b/tests/test-mbstoc32s.c index 5358118906..e638999bb6 100644 --- a/tests/test-mbstoc32s.c +++ b/tests/test-mbstoc32s.c @@ -64,6 +64,15 @@ main (int argc, char *argv[]) ASSERT (wc == 0); } +#ifdef __ANDROID__ + /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the + "C" locale. Furthermore, when you attempt to set the "C" or "POSIX" + locale via setlocale(), what you get is a "C" locale with UTF-8 encoding, + that is, effectively the "C.UTF-8" locale. */ + if (argc > 1 && strcmp (argv[1], "5") == 0 && MB_CUR_MAX > 1) + argv[1] = "2"; +#endif + if (argc > 1) { int unlimited; @@ -232,6 +241,63 @@ main (int argc, char *argv[]) } break; + case '5': + /* C or POSIX locale. */ + { + char input[] = "n/a"; + + src = input; + ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 1); + ASSERT (ret == 3); + + src = input; + ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 1); + ASSERT (ret == (unlimited ? 3 : 1)); + ASSERT (buf[0] == 'n'); + if (unlimited) + { + ASSERT (buf[1] == '/'); + ASSERT (buf[2] == 'a'); + ASSERT (buf[3] == 0); + ASSERT (buf[4] == (char32_t) 0xBADFACE); + } + else + ASSERT (buf[1] == (char32_t) 0xBADFACE); + } + { + int c; + char input[2]; + + for (c = 0; c < 0x100; c++) + if (c != 0) + { + /* We are testing all nonnull bytes. */ + input[0] = c; + input[1] = '\0'; + + src = input; + ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 1); + ASSERT (ret == 1); + + buf[0] = buf[1] = (char32_t) 0xBADFACE; + src = input; + ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 1); + /* POSIX:2018 says regarding mbstowcs: "In the POSIX locale an + [EILSEQ] error cannot occur since all byte values are valid + characters." It is reasonable to expect mbstoc32s to behave + in the same way. */ + ASSERT (ret == 1); + if (c < 0x80) + /* c is an ASCII character. */ + ASSERT (buf[0] == c); + else + /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF. + But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */ + ASSERT (buf[0] == (btoc32 (c) == 0xDF00 + c ? btoc32 (c) : c)); + } + } + break; + default: return 1; } -- 2.34.1