new modules c32tolower, c32toupper

Bruno Haible Sun, 09 Apr 2023 17:04:21 -0700

The function mbrtoc32 transforms a multibyte character to a char32_t,
thus solving the "wchar_t mess", explained in
https://www.gnu.org/software/libunistring/manual/html_node/The-wchar_005ft-mess.html
 .


But while ISO C 11..23 has upper/lowercase mapping functions for wchar_t
(towlower, towupper), is does not have such functions for char32_t values.

With these patches, I'm adding towlower / towupper workalike functions
for char32_t.


2023-04-09  Bruno Haible  <br...@clisp.org>

        c32toupper: Add tests.
        * tests/test-c32toupper.sh: New file.
        * tests/test-c32toupper.c: New file, based on tests/test-c32islower.c.
        * modules/c32toupper-tests: New file.

        c32toupper: New module.
        * lib/uchar.in.h (c32toupper): New declaration.
        * lib/c32toupper.c: New file.
        * modules/c32toupper: New file.
        * m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
        GNULIB_C32TOUPPER.
        * modules/uchar (Makefile.am): Substitute GNULIB_C32TOUPPER.

2023-04-09  Bruno Haible  <br...@clisp.org>

        c32tolower: Add tests.
        * tests/test-c32tolower.sh: New file.
        * tests/test-c32tolower.c: New file, based on tests/test-c32isupper.c.
        * modules/c32tolower-tests: New file.

        c32tolower: New module.
        * lib/uchar.in.h (c32tolower): New declaration.
        * lib/c32tolower.c: New file.
        * lib/c32to-impl.h: New file, based on lib/c32is-impl.h.
        * modules/c32tolower: New file.
        * m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
        GNULIB_C32TOLOWER.
        * modules/uchar (Makefile.am): Substitute GNULIB_C32TOLOWER.

>From 444ebd4da72a6d959563c9ac29927c2ccc11e003 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Tue, 4 Apr 2023 23:21:04 +0200
Subject: [PATCH 1/4] c32tolower: New module.

* lib/uchar.in.h (c32tolower): New declaration.
* lib/c32tolower.c: New file.
* lib/c32to-impl.h: New file, based on lib/c32is-impl.h.
* modules/c32tolower: New file.
* m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
GNULIB_C32TOLOWER.
* modules/uchar (Makefile.am): Substitute GNULIB_C32TOLOWER.
---
 ChangeLog          | 11 ++++++
 lib/c32to-impl.h   | 92 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/c32tolower.c   | 34 +++++++++++++++++
 lib/uchar.in.h     | 18 +++++++++
 m4/uchar_h.m4      |  3 +-
 modules/c32tolower | 44 ++++++++++++++++++++++
 modules/uchar      |  1 +
 7 files changed, 202 insertions(+), 1 deletion(-)
 create mode 100644 lib/c32to-impl.h
 create mode 100644 lib/c32tolower.c
 create mode 100644 modules/c32tolower

diff --git a/ChangeLog b/ChangeLog
index ea2857a38c..79a570e891 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2023-04-09  Bruno Haible  <br...@clisp.org>
+
+	c32tolower: New module.
+	* lib/uchar.in.h (c32tolower): New declaration.
+	* lib/c32tolower.c: New file.
+	* lib/c32to-impl.h: New file, based on lib/c32is-impl.h.
+	* modules/c32tolower: New file.
+	* m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
+	GNULIB_C32TOLOWER.
+	* modules/uchar (Makefile.am): Substitute GNULIB_C32TOLOWER.
+
 2023-04-09  Bruno Haible  <br...@clisp.org>
 
 	c32is*: Ensure GNULIB_defined_mbstate_t is defined on AIX.
diff --git a/lib/c32to-impl.h b/lib/c32to-impl.h
new file mode 100644
index 0000000000..724691a116
--- /dev/null
+++ b/lib/c32to-impl.h
@@ -0,0 +1,92 @@
+/* Case mapping of a 32-bit wide character.
+   Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+   This file is free software.
+   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
+   You can redistribute it and/or modify it under either
+     - the terms of the GNU Lesser General Public License as published
+       by the Free Software Foundation, either version 3, or (at your
+       option) any later version, or
+     - the terms of the GNU General Public License as published by the
+       Free Software Foundation; either version 2, or (at your option)
+       any later version, or
+     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License and the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License and of the GNU General Public License along with this
+   program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <br...@clisp.org>, 2023.  */
+
+#include <wchar.h>
+#include <wctype.h>
+
+#if GNULIB_defined_mbstate_t
+# include "localcharset.h"
+# include "streq.h"
+#endif
+
+#include "unicase.h"
+
+#if _GL_WCHAR_T_IS_UCS4 && !GNULIB_defined_mbstate_t
+_GL_EXTERN_INLINE
+#endif
+wint_t
+FUNC (wint_t wc)
+{
+  /* The char32_t encoding of a multibyte character is defined by the way
+     mbrtoc32() is defined.  */
+
+#if GNULIB_defined_mbstate_t            /* AIX, IRIX */
+  /* mbrtoc32() is defined on top of mbtowc() for the non-UTF-8 locales
+     and directly for the UTF-8 locales.  */
+  if (wc != WEOF)
+    {
+      const char *encoding = locale_charset ();
+      if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+        return UCS_FUNC (wc);
+      else
+        return WCHAR_FUNC (wc);
+    }
+  else
+    return wc;
+
+#elif HAVE_WORKING_MBRTOC32             /* glibc */
+  /* mbrtoc32() is essentially defined by the system libc.  */
+
+# if _GL_WCHAR_T_IS_UCS4
+  /* The char32_t encoding of a multibyte character is known to be the same as
+     the wchar_t encoding.  */
+  return WCHAR_FUNC (wc);
+# else
+  /* The char32_t encoding of a multibyte character is known to be UCS-4,
+     different from the wchar_t encoding.  */
+  if (wc != WEOF)
+    return UCS_FUNC (wc);
+  else
+    return wc;
+# endif
+
+#elif _GL_SMALL_WCHAR_T                 /* Cygwin, mingw, MSVC */
+  /* The wchar_t encoding is UTF-16.
+     The char32_t encoding is UCS-4.  */
+
+  if (wc == WEOF || wc == (wchar_t) wc)
+    /* wc is in the range for the tow* functions.  */
+    return WCHAR_FUNC (wc);
+  else
+    return UCS_FUNC (wc);
+
+#else /* macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Minix, Android */
+  /* char32_t and wchar_t are equivalent.  */
+  static_assert (sizeof (char32_t) == sizeof (wchar_t));
+
+  return WCHAR_FUNC (wc);
+#endif
+}
diff --git a/lib/c32tolower.c b/lib/c32tolower.c
new file mode 100644
index 0000000000..447016214e
--- /dev/null
+++ b/lib/c32tolower.c
@@ -0,0 +1,34 @@
+/* Map a 32-bit wide character to lowercase.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is free software.
+   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
+   You can redistribute it and/or modify it under either
+     - the terms of the GNU Lesser General Public License as published
+       by the Free Software Foundation, either version 3, or (at your
+       option) any later version, or
+     - the terms of the GNU General Public License as published by the
+       Free Software Foundation; either version 2, or (at your option)
+       any later version, or
+     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License and the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License and of the GNU General Public License along with this
+   program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#define IN_C32TOLOWER
+/* Specification.  */
+#include <uchar.h>
+
+#define FUNC c32tolower
+#define WCHAR_FUNC towlower
+#define UCS_FUNC uc_tolower
+#include "c32to-impl.h"
diff --git a/lib/uchar.in.h b/lib/uchar.in.h
index 4461a35901..c6795f7e91 100644
--- a/lib/uchar.in.h
+++ b/lib/uchar.in.h
@@ -343,6 +343,24 @@ _GL_CXXALIASWARN (c32isxdigit);
 #endif
 
 
+/* Case mapping of a 32-bit wide character.  */
+#if @GNULIB_C32TOLOWER@
+# if (_GL_WCHAR_T_IS_UCS4 && !GNULIB_defined_mbstate_t) && !defined IN_C32TOLOWER
+_GL_BEGIN_C_LINKAGE
+_GL_INLINE wint_t
+c32tolower (wint_t wc)
+{
+  return towlower (wc);
+}
+_GL_END_C_LINKAGE
+# else
+_GL_FUNCDECL_SYS (c32tolower, wint_t, (wint_t wc));
+# endif
+_GL_CXXALIAS_SYS (c32tolower, wint_t, (wint_t wc));
+_GL_CXXALIASWARN (c32tolower);
+#endif
+
+
 /* Converts a 32-bit wide character to a multibyte character.  */
 #if @GNULIB_C32RTOMB@
 # if @REPLACE_C32RTOMB@
diff --git a/m4/uchar_h.m4 b/m4/uchar_h.m4
index 3af24cf3e4..6bb83adb6a 100644
--- a/m4/uchar_h.m4
+++ b/m4/uchar_h.m4
@@ -1,4 +1,4 @@
-# uchar_h.m4 serial 22
+# uchar_h.m4 serial 23
 dnl Copyright (C) 2019-2023 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -214,6 +214,7 @@ AC_DEFUN([gl_UCHAR_H_REQUIRE_DEFAULTS]
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32ISSPACE])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32ISUPPER])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32ISXDIGIT])
+    gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32TOLOWER])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32RTOMB])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SNRTOMBS])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SRTOMBS])
diff --git a/modules/c32tolower b/modules/c32tolower
new file mode 100644
index 0000000000..b6ed633aec
--- /dev/null
+++ b/modules/c32tolower
@@ -0,0 +1,44 @@
+Description:
+c32tolower() function: map 32-bit wide character to lowercase.
+
+Files:
+lib/c32tolower.c
+lib/c32to-impl.h
+m4/mbrtoc32.m4
+m4/mbrtowc.m4
+m4/locale-fr.m4
+m4/locale-ja.m4
+m4/locale-zh.m4
+m4/codeset.m4
+
+Depends-on:
+uchar
+wchar
+wctype-h
+localcharset    [test $REPLACE_MBSTATE_T = 1]
+streq           [test $REPLACE_MBSTATE_T = 1]
+unicase/tolower
+assert-h
+
+configure.ac:
+AC_REQUIRE([gl_UCHAR_H])
+dnl Determine REPLACE_MBSTATE_T, from which GNULIB_defined_mbstate_t is
+dnl determined.  It describes how mbrtoc32 is implemented.
+AC_REQUIRE([gl_MBSTATE_T_BROKEN])
+AC_REQUIRE([gl_MBRTOC32_SANITYCHECK])
+gl_UCHAR_MODULE_INDICATOR([c32tolower])
+
+Makefile.am:
+lib_SOURCES += c32tolower.c
+
+Include:
+<uchar.h>
+
+Link:
+$(LTLIBUNISTRING) when linking with libtool, $(LIBUNISTRING) otherwise
+
+License:
+LGPLv3+ or GPLv2+
+
+Maintainer:
+Bruno Haible
diff --git a/modules/uchar b/modules/uchar
index 985b58206f..f49db7d42c 100644
--- a/modules/uchar
+++ b/modules/uchar
@@ -51,6 +51,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H)
 	      -e 's/@''GNULIB_C32ISSPACE''@/$(GNULIB_C32ISSPACE)/g' \
 	      -e 's/@''GNULIB_C32ISUPPER''@/$(GNULIB_C32ISUPPER)/g' \
 	      -e 's/@''GNULIB_C32ISXDIGIT''@/$(GNULIB_C32ISXDIGIT)/g' \
+	      -e 's/@''GNULIB_C32TOLOWER''@/$(GNULIB_C32TOLOWER)/g' \
 	      -e 's/@''GNULIB_C32RTOMB''@/$(GNULIB_C32RTOMB)/g' \
 	      -e 's/@''GNULIB_C32SNRTOMBS''@/$(GNULIB_C32SNRTOMBS)/g' \
 	      -e 's/@''GNULIB_C32SRTOMBS''@/$(GNULIB_C32SRTOMBS)/g' \
-- 
2.34.1

>From cba07f77a7579c8d4f2d80783cb3b5acdab24bc5 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Mon, 10 Apr 2023 01:46:37 +0200
Subject: [PATCH 2/4] c32tolower: Add tests.

* tests/test-c32tolower.sh: New file.
* tests/test-c32tolower.c: New file, based on tests/test-c32isupper.c.
* modules/c32tolower-tests: New file.
---
 ChangeLog                |   5 +
 modules/c32tolower-tests |  30 +++
 tests/test-c32tolower.c  | 436 +++++++++++++++++++++++++++++++++++++++
 tests/test-c32tolower.sh |  42 ++++
 4 files changed, 513 insertions(+)
 create mode 100644 modules/c32tolower-tests
 create mode 100644 tests/test-c32tolower.c
 create mode 100755 tests/test-c32tolower.sh

diff --git a/ChangeLog b/ChangeLog
index 79a570e891..defa123445 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2023-04-09  Bruno Haible  <br...@clisp.org>
 
+	c32tolower: Add tests.
+	* tests/test-c32tolower.sh: New file.
+	* tests/test-c32tolower.c: New file, based on tests/test-c32isupper.c.
+	* modules/c32tolower-tests: New file.
+
 	c32tolower: New module.
 	* lib/uchar.in.h (c32tolower): New declaration.
 	* lib/c32tolower.c: New file.
diff --git a/modules/c32tolower-tests b/modules/c32tolower-tests
new file mode 100644
index 0000000000..1153eabadc
--- /dev/null
+++ b/modules/c32tolower-tests
@@ -0,0 +1,30 @@
+Files:
+tests/test-c32tolower.sh
+tests/test-c32tolower.c
+tests/signature.h
+tests/macros.h
+m4/locale-fr.m4
+m4/locale-ja.m4
+m4/locale-zh.m4
+m4/codeset.m4
+
+Depends-on:
+mbrtoc32
+c32rtomb
+setlocale
+
+configure.ac:
+gt_LOCALE_FR
+gt_LOCALE_FR_UTF8
+gt_LOCALE_JA
+gt_LOCALE_ZH_CN
+
+Makefile.am:
+TESTS += test-c32tolower.sh
+TESTS_ENVIRONMENT += \
+  LOCALE_FR='@LOCALE_FR@' \
+  LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \
+  LOCALE_JA='@LOCALE_JA@' \
+  LOCALE_ZH_CN='@LOCALE_ZH_CN@'
+check_PROGRAMS += test-c32tolower
+test_c32tolower_LDADD = $(LDADD) $(SETLOCALE_LIB) $(MBRTOWC_LIB) $(LIBUNISTRING)
diff --git a/tests/test-c32tolower.c b/tests/test-c32tolower.c
new file mode 100644
index 0000000000..30fe9b5848
--- /dev/null
+++ b/tests/test-c32tolower.c
@@ -0,0 +1,436 @@
+/* Test of c32tolower() function.
+   Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <uchar.h>
+
+#include "signature.h"
+SIGNATURE_CHECK (c32tolower, wint_t, (wint_t));
+
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+#include "macros.h"
+
+/* Representation of a multibyte character.  */
+#define MBCHAR_BUF_SIZE 6
+struct multibyte
+{
+  size_t nbytes;             /* number of bytes of current character, > 0 */
+  char buf[MBCHAR_BUF_SIZE]; /* room for the bytes */
+};
+
+/* Returns the value of c32tolower for the multibyte character s[0..n-1],
+   as a multibyte character.  */
+static struct multibyte
+for_character (const char *s, size_t n)
+{
+  mbstate_t state;
+  char32_t wc;
+  size_t ret;
+  struct multibyte result;
+
+  memset (&state, '\0', sizeof (mbstate_t));
+  wc = (char32_t) 0xBADFACE;
+  ret = mbrtoc32 (&wc, s, n, &state);
+  ASSERT (ret == n);
+
+  wc = c32tolower (wc);
+  ASSERT (wc != WEOF);
+
+  memset (&state, '\0', sizeof (mbstate_t));
+  ret = c32rtomb (result.buf, wc, &state);
+  ASSERT (ret != 0);
+  if (ret == (size_t)(-1))
+    /* wc cannot be converted back to multibyte.  */
+    result.nbytes = 0;
+  else
+    {
+      ASSERT (ret <= MBCHAR_BUF_SIZE);
+      result.nbytes = ret;
+    }
+  return result;
+}
+
+int
+main (int argc, char *argv[])
+{
+  wint_t wc;
+  struct multibyte mb;
+  char buf[4];
+
+  /* configure should already have checked that the locale is supported.  */
+  if (setlocale (LC_ALL, "") == NULL)
+    return 1;
+
+  /* Test WEOF.  */
+  wc = c32tolower (WEOF);
+  ASSERT (wc == WEOF);
+
+  /* Test single-byte characters.
+     POSIX specifies in
+       <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html>
+     that
+       - in all locales, the uppercase characters include the A ... Z
+         characters, and the corresponding characters a ... z (if not in a
+         Turkish locale) are lowercase,
+       - in the "POSIX" locale (which is usually the same as the "C" locale),
+         the uppercase characters include only the ASCII A ... Z characters,
+         and the corresponding characters a ... z are lowercase.
+   */
+#if defined __NetBSD__
+  /* towlower is broken in the zh_CN.GB18030 locale on NetBSD 9.0.
+     See <https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=57339>.  */
+  if (!(argc > 1 && argv[1][0] == '4'))
+#endif
+  {
+    int c;
+
+    for (c = 0; c < 0x100; c++)
+      switch (c)
+        {
+        case '\t': case '\v': case '\f':
+        case ' ': case '!': case '"': case '#': case '%':
+        case '&': case '\'': case '(': case ')': case '*':
+        case '+': case ',': case '-': case '.': case '/':
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+        case ':': case ';': case '<': case '=': case '>':
+        case '?':
+        case 'A': case 'B': case 'C': case 'D': case 'E':
+        case 'F': case 'G': case 'H': case 'I': case 'J':
+        case 'K': case 'L': case 'M': case 'N': case 'O':
+        case 'P': case 'Q': case 'R': case 'S': case 'T':
+        case 'U': case 'V': case 'W': case 'X': case 'Y':
+        case 'Z':
+        case '[': case '\\': case ']': case '^': case '_':
+        case 'a': case 'b': case 'c': case 'd': case 'e':
+        case 'f': case 'g': case 'h': case 'i': case 'j':
+        case 'k': case 'l': case 'm': case 'n': case 'o':
+        case 'p': case 'q': case 'r': case 's': case 't':
+        case 'u': case 'v': case 'w': case 'x': case 'y':
+        case 'z': case '{': case '|': case '}': case '~':
+          /* c is in the ISO C "basic character set".  */
+          buf[0] = (unsigned char) c;
+          mb = for_character (buf, 1);
+          switch (c)
+            {
+            case 'A': case 'B': case 'C': case 'D': case 'E':
+            case 'F': case 'G': case 'H': case 'I': case 'J':
+            case 'K': case 'L': case 'M': case 'N': case 'O':
+            case 'P': case 'Q': case 'R': case 'S': case 'T':
+            case 'U': case 'V': case 'W': case 'X': case 'Y':
+            case 'Z':
+              ASSERT (mb.nbytes == 1);
+              ASSERT ((unsigned char) mb.buf[0] == (unsigned char) c - 'A' + 'a');
+              break;
+            default:
+              ASSERT (mb.nbytes == 1);
+              ASSERT ((unsigned char) mb.buf[0] == c);
+              break;
+            }
+          break;
+        }
+  }
+
+  if (argc > 1)
+    switch (argv[1][0])
+      {
+      case '0':
+        /* C locale; tested above.  */
+        return 0;
+
+      case '1':
+        /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
+        {
+          /* U+00B2 SUPERSCRIPT TWO */
+          mb = for_character ("\262", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\262", 1) == 0);
+          /* U+00B5 MICRO SIGN */
+          mb = for_character ("\265", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\265", 1) == 0);
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\311", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\351", 1) == 0);
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\337", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\337", 1) == 0);
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\351", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\351", 1) == 0);
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\377", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\377", 1) == 0);
+        }
+        return 0;
+
+      case '2':
+        /* Locale encoding is EUC-JP.  */
+        {
+        #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\217\252\261", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\253\261", 3) == 0);
+        #endif
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\217\251\316", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\251\316", 3) == 0);
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\217\253\261", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\253\261", 3) == 0);
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\217\253\363", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\253\363", 3) == 0);
+        #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
+          /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
+          mb = for_character ("\217\251\250", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\251\310", 3) == 0);
+        #endif
+          /* U+0142 LATIN SMALL LETTER L WITH STROKE */
+          mb = for_character ("\217\251\310", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\251\310", 3) == 0);
+        #if !defined __DragonFly__
+          /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
+          mb = for_character ("\247\273", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
+        #endif
+          /* U+0449 CYRILLIC SMALL LETTER SHCHA */
+          mb = for_character ("\247\353", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
+          /* U+3073 HIRAGANA LETTER BI */
+          mb = for_character ("\244\323", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
+        #if !defined __DragonFly__
+          /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
+          mb = for_character ("\243\307", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
+        #endif
+          /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
+          mb = for_character ("\243\347", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
+        }
+        return 0;
+
+      case '3':
+        /* Locale encoding is UTF-8.  */
+        {
+          /* U+00B2 SUPERSCRIPT TWO */
+          mb = for_character ("\302\262", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\302\262", 2) == 0);
+          /* U+00B5 MICRO SIGN */
+          mb = for_character ("\302\265", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\302\265", 2) == 0);
+        #if !(defined _WIN32 && !defined __CYGWIN__)
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\303\211", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\303\251", 2) == 0);
+        #endif
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\303\237", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\303\237", 2) == 0);
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\303\251", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\303\251", 2) == 0);
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\303\277", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\303\277", 2) == 0);
+          /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
+          mb = for_character ("\305\201", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\305\202", 2) == 0);
+          /* U+0142 LATIN SMALL LETTER L WITH STROKE */
+          mb = for_character ("\305\202", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\305\202", 2) == 0);
+          /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
+          mb = for_character ("\320\251", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\321\211", 2) == 0);
+          /* U+0449 CYRILLIC SMALL LETTER SHCHA */
+          mb = for_character ("\321\211", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\321\211", 2) == 0);
+          /* U+05D5 HEBREW LETTER VAV */
+          mb = for_character ("\327\225", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\327\225", 2) == 0);
+          /* U+3073 HIRAGANA LETTER BI */
+          mb = for_character ("\343\201\263", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\343\201\263", 3) == 0);
+          /* U+3162 HANGUL LETTER YI */
+          mb = for_character ("\343\205\242", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\343\205\242", 3) == 0);
+          /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
+          mb = for_character ("\357\274\247", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\357\275\207", 3) == 0);
+          /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
+          mb = for_character ("\357\275\207", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\357\275\207", 3) == 0);
+          /* U+FFDB HALFWIDTH HANGUL LETTER YI */
+          mb = for_character ("\357\277\233", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\357\277\233", 3) == 0);
+        #if !(defined __DragonFly__ || defined __sun)
+          /* U+10419 DESERET CAPITAL LETTER EF */
+          mb = for_character ("\360\220\220\231", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\360\220\221\201", 4) == 0);
+        #endif
+          /* U+10441 DESERET SMALL LETTER EF */
+          mb = for_character ("\360\220\221\201", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\360\220\221\201", 4) == 0);
+          /* U+E0041 TAG LATIN CAPITAL LETTER A */
+          mb = for_character ("\363\240\201\201", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\363\240\201\201", 4) == 0);
+          /* U+E0061 TAG LATIN SMALL LETTER A */
+          mb = for_character ("\363\240\201\241", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\363\240\201\241", 4) == 0);
+        }
+        return 0;
+
+      case '4':
+        /* Locale encoding is GB18030.  */
+        {
+          /* U+00B2 SUPERSCRIPT TWO */
+          mb = for_character ("\201\060\205\065", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\205\065", 4) == 0);
+          /* U+00B5 MICRO SIGN */
+          mb = for_character ("\201\060\205\070", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\205\070", 4) == 0);
+        #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\201\060\207\067", 4);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\250\246", 2) == 0);
+        #endif
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\201\060\211\070", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\211\070", 4) == 0);
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\250\246", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\250\246", 2) == 0);
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\201\060\213\067", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\213\067", 4) == 0);
+        #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
+          /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
+          mb = for_character ("\201\060\221\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\222\060", 4) == 0);
+        #endif
+          /* U+0142 LATIN SMALL LETTER L WITH STROKE */
+          mb = for_character ("\201\060\222\060", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\222\060", 4) == 0);
+        #if !(defined __FreeBSD__ || defined __DragonFly__)
+          /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
+          mb = for_character ("\247\273", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
+        #endif
+          /* U+0449 CYRILLIC SMALL LETTER SHCHA */
+          mb = for_character ("\247\353", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
+          /* U+05D5 HEBREW LETTER VAV */
+          mb = for_character ("\201\060\371\067", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\371\067", 4) == 0);
+          /* U+3073 HIRAGANA LETTER BI */
+          mb = for_character ("\244\323", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
+          /* U+3162 HANGUL LETTER YI */
+          mb = for_character ("\201\071\256\062", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\071\256\062", 4) == 0);
+        #if !defined __DragonFly__
+          /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
+          mb = for_character ("\243\307", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
+        #endif
+          /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
+          mb = for_character ("\243\347", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
+          /* U+FFDB HALFWIDTH HANGUL LETTER YI */
+          mb = for_character ("\204\061\241\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\204\061\241\071", 4) == 0);
+        #if !((defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__ || defined __sun)
+          /* U+10419 DESERET CAPITAL LETTER EF */
+          mb = for_character ("\220\060\351\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\220\060\355\071", 4) == 0);
+        #endif
+          /* U+10441 DESERET SMALL LETTER EF */
+          mb = for_character ("\220\060\355\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\220\060\355\071", 4) == 0);
+          /* U+E0041 TAG LATIN CAPITAL LETTER A */
+          mb = for_character ("\323\066\234\063", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\323\066\234\063", 4) == 0);
+          /* U+E0061 TAG LATIN SMALL LETTER A */
+          mb = for_character ("\323\066\237\065", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\323\066\237\065", 4) == 0);
+        }
+        return 0;
+
+      }
+
+  return 1;
+}
diff --git a/tests/test-c32tolower.sh b/tests/test-c32tolower.sh
new file mode 100755
index 0000000000..a0859a900e
--- /dev/null
+++ b/tests/test-c32tolower.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+# Allow distinguishing the various invocations in the .log file.
+set -x
+
+# Test in the POSIX locale.
+LC_ALL=C     ${CHECKER} ./test-c32tolower${EXEEXT} 0 || exit 1
+LC_ALL=POSIX ${CHECKER} ./test-c32tolower${EXEEXT} 0 || exit 1
+
+# Test in an ISO-8859-1 or ISO-8859-15 locale.
+: "${LOCALE_FR=fr_FR}"
+if test $LOCALE_FR != none; then
+  LC_ALL=$LOCALE_FR \
+  ${CHECKER} ./test-c32tolower${EXEEXT} 1 \
+  || exit 1
+fi
+
+# Test whether a specific EUC-JP locale is installed.
+: "${LOCALE_JA=ja_JP}"
+if test $LOCALE_JA != none; then
+  LC_ALL=$LOCALE_JA \
+  ${CHECKER} ./test-c32tolower${EXEEXT} 2 \
+  || exit 1
+fi
+
+# Test whether a specific UTF-8 locale is installed.
+: "${LOCALE_FR_UTF8=fr_FR.UTF-8}"
+if test $LOCALE_FR_UTF8 != none; then
+  LC_ALL=$LOCALE_FR_UTF8 \
+  ${CHECKER} ./test-c32tolower${EXEEXT} 3 \
+  || exit 1
+fi
+
+# Test whether a specific GB18030 locale is installed.
+: "${LOCALE_ZH_CN=zh_CN.GB18030}"
+if test $LOCALE_ZH_CN != none; then
+  LC_ALL=$LOCALE_ZH_CN \
+  ${CHECKER} ./test-c32tolower${EXEEXT} 4 \
+  || exit 1
+fi
+
+exit 0
-- 
2.34.1

>From d47ae89803ca1cde69b8a312101421322f4259f6 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Mon, 10 Apr 2023 01:50:39 +0200
Subject: [PATCH 3/4] c32toupper: New module.

* lib/uchar.in.h (c32toupper): New declaration.
* lib/c32toupper.c: New file.
* modules/c32toupper: New file.
* m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
GNULIB_C32TOUPPER.
* modules/uchar (Makefile.am): Substitute GNULIB_C32TOUPPER.
---
 ChangeLog          | 10 ++++++++++
 lib/c32toupper.c   | 34 ++++++++++++++++++++++++++++++++++
 lib/uchar.in.h     | 15 +++++++++++++++
 m4/uchar_h.m4      |  3 ++-
 modules/c32toupper | 44 ++++++++++++++++++++++++++++++++++++++++++++
 modules/uchar      |  1 +
 6 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 lib/c32toupper.c
 create mode 100644 modules/c32toupper

diff --git a/ChangeLog b/ChangeLog
index defa123445..9b7487dd01 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2023-04-09  Bruno Haible  <br...@clisp.org>
+
+	c32toupper: New module.
+	* lib/uchar.in.h (c32toupper): New declaration.
+	* lib/c32toupper.c: New file.
+	* modules/c32toupper: New file.
+	* m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
+	GNULIB_C32TOUPPER.
+	* modules/uchar (Makefile.am): Substitute GNULIB_C32TOUPPER.
+
 2023-04-09  Bruno Haible  <br...@clisp.org>
 
 	c32tolower: Add tests.
diff --git a/lib/c32toupper.c b/lib/c32toupper.c
new file mode 100644
index 0000000000..a3ca5ecf19
--- /dev/null
+++ b/lib/c32toupper.c
@@ -0,0 +1,34 @@
+/* Map a 32-bit wide character to uppercase.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is free software.
+   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
+   You can redistribute it and/or modify it under either
+     - the terms of the GNU Lesser General Public License as published
+       by the Free Software Foundation, either version 3, or (at your
+       option) any later version, or
+     - the terms of the GNU General Public License as published by the
+       Free Software Foundation; either version 2, or (at your option)
+       any later version, or
+     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License and the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License and of the GNU General Public License along with this
+   program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#define IN_C32TOUPPER
+/* Specification.  */
+#include <uchar.h>
+
+#define FUNC c32toupper
+#define WCHAR_FUNC towupper
+#define UCS_FUNC uc_toupper
+#include "c32to-impl.h"
diff --git a/lib/uchar.in.h b/lib/uchar.in.h
index c6795f7e91..27b91c5b58 100644
--- a/lib/uchar.in.h
+++ b/lib/uchar.in.h
@@ -359,6 +359,21 @@ _GL_FUNCDECL_SYS (c32tolower, wint_t, (wint_t wc));
 _GL_CXXALIAS_SYS (c32tolower, wint_t, (wint_t wc));
 _GL_CXXALIASWARN (c32tolower);
 #endif
+#if @GNULIB_C32TOUPPER@
+# if (_GL_WCHAR_T_IS_UCS4 && !GNULIB_defined_mbstate_t) && !defined IN_C32TOUPPER
+_GL_BEGIN_C_LINKAGE
+_GL_INLINE wint_t
+c32toupper (wint_t wc)
+{
+  return towupper (wc);
+}
+_GL_END_C_LINKAGE
+# else
+_GL_FUNCDECL_SYS (c32toupper, wint_t, (wint_t wc));
+# endif
+_GL_CXXALIAS_SYS (c32toupper, wint_t, (wint_t wc));
+_GL_CXXALIASWARN (c32toupper);
+#endif
 
 
 /* Converts a 32-bit wide character to a multibyte character.  */
diff --git a/m4/uchar_h.m4 b/m4/uchar_h.m4
index 6bb83adb6a..2679371716 100644
--- a/m4/uchar_h.m4
+++ b/m4/uchar_h.m4
@@ -1,4 +1,4 @@
-# uchar_h.m4 serial 23
+# uchar_h.m4 serial 24
 dnl Copyright (C) 2019-2023 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -215,6 +215,7 @@ AC_DEFUN([gl_UCHAR_H_REQUIRE_DEFAULTS]
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32ISUPPER])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32ISXDIGIT])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32TOLOWER])
+    gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32TOUPPER])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32RTOMB])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SNRTOMBS])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SRTOMBS])
diff --git a/modules/c32toupper b/modules/c32toupper
new file mode 100644
index 0000000000..364e011818
--- /dev/null
+++ b/modules/c32toupper
@@ -0,0 +1,44 @@
+Description:
+c32toupper() function: map 32-bit wide character to uppercase.
+
+Files:
+lib/c32toupper.c
+lib/c32to-impl.h
+m4/mbrtoc32.m4
+m4/mbrtowc.m4
+m4/locale-fr.m4
+m4/locale-ja.m4
+m4/locale-zh.m4
+m4/codeset.m4
+
+Depends-on:
+uchar
+wchar
+wctype-h
+localcharset    [test $REPLACE_MBSTATE_T = 1]
+streq           [test $REPLACE_MBSTATE_T = 1]
+unicase/toupper
+assert-h
+
+configure.ac:
+AC_REQUIRE([gl_UCHAR_H])
+dnl Determine REPLACE_MBSTATE_T, from which GNULIB_defined_mbstate_t is
+dnl determined.  It describes how mbrtoc32 is implemented.
+AC_REQUIRE([gl_MBSTATE_T_BROKEN])
+AC_REQUIRE([gl_MBRTOC32_SANITYCHECK])
+gl_UCHAR_MODULE_INDICATOR([c32toupper])
+
+Makefile.am:
+lib_SOURCES += c32toupper.c
+
+Include:
+<uchar.h>
+
+Link:
+$(LTLIBUNISTRING) when linking with libtool, $(LIBUNISTRING) otherwise
+
+License:
+LGPLv3+ or GPLv2+
+
+Maintainer:
+Bruno Haible
diff --git a/modules/uchar b/modules/uchar
index f49db7d42c..6363d543d9 100644
--- a/modules/uchar
+++ b/modules/uchar
@@ -52,6 +52,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H)
 	      -e 's/@''GNULIB_C32ISUPPER''@/$(GNULIB_C32ISUPPER)/g' \
 	      -e 's/@''GNULIB_C32ISXDIGIT''@/$(GNULIB_C32ISXDIGIT)/g' \
 	      -e 's/@''GNULIB_C32TOLOWER''@/$(GNULIB_C32TOLOWER)/g' \
+	      -e 's/@''GNULIB_C32TOUPPER''@/$(GNULIB_C32TOUPPER)/g' \
 	      -e 's/@''GNULIB_C32RTOMB''@/$(GNULIB_C32RTOMB)/g' \
 	      -e 's/@''GNULIB_C32SNRTOMBS''@/$(GNULIB_C32SNRTOMBS)/g' \
 	      -e 's/@''GNULIB_C32SRTOMBS''@/$(GNULIB_C32SRTOMBS)/g' \
-- 
2.34.1

>From 99fc85c723f3e393be47b1cbcf2711a03ceaf6ef Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Mon, 10 Apr 2023 01:51:54 +0200
Subject: [PATCH 4/4] c32toupper: Add tests.

* tests/test-c32toupper.sh: New file.
* tests/test-c32toupper.c: New file, based on tests/test-c32islower.c.
* modules/c32toupper-tests: New file.
---
 ChangeLog                |   5 +
 modules/c32toupper-tests |  30 +++
 tests/test-c32toupper.c  | 448 +++++++++++++++++++++++++++++++++++++++
 tests/test-c32toupper.sh |  42 ++++
 4 files changed, 525 insertions(+)
 create mode 100644 modules/c32toupper-tests
 create mode 100644 tests/test-c32toupper.c
 create mode 100755 tests/test-c32toupper.sh

diff --git a/ChangeLog b/ChangeLog
index 9b7487dd01..7a17b548ce 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2023-04-09  Bruno Haible  <br...@clisp.org>
 
+	c32toupper: Add tests.
+	* tests/test-c32toupper.sh: New file.
+	* tests/test-c32toupper.c: New file, based on tests/test-c32islower.c.
+	* modules/c32toupper-tests: New file.
+
 	c32toupper: New module.
 	* lib/uchar.in.h (c32toupper): New declaration.
 	* lib/c32toupper.c: New file.
diff --git a/modules/c32toupper-tests b/modules/c32toupper-tests
new file mode 100644
index 0000000000..eeedc9df1e
--- /dev/null
+++ b/modules/c32toupper-tests
@@ -0,0 +1,30 @@
+Files:
+tests/test-c32toupper.sh
+tests/test-c32toupper.c
+tests/signature.h
+tests/macros.h
+m4/locale-fr.m4
+m4/locale-ja.m4
+m4/locale-zh.m4
+m4/codeset.m4
+
+Depends-on:
+mbrtoc32
+c32rtomb
+setlocale
+
+configure.ac:
+gt_LOCALE_FR
+gt_LOCALE_FR_UTF8
+gt_LOCALE_JA
+gt_LOCALE_ZH_CN
+
+Makefile.am:
+TESTS += test-c32toupper.sh
+TESTS_ENVIRONMENT += \
+  LOCALE_FR='@LOCALE_FR@' \
+  LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \
+  LOCALE_JA='@LOCALE_JA@' \
+  LOCALE_ZH_CN='@LOCALE_ZH_CN@'
+check_PROGRAMS += test-c32toupper
+test_c32toupper_LDADD = $(LDADD) $(SETLOCALE_LIB) $(MBRTOWC_LIB) $(LIBUNISTRING)
diff --git a/tests/test-c32toupper.c b/tests/test-c32toupper.c
new file mode 100644
index 0000000000..37f0134de8
--- /dev/null
+++ b/tests/test-c32toupper.c
@@ -0,0 +1,448 @@
+/* Test of c32toupper() function.
+   Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <uchar.h>
+
+#include "signature.h"
+SIGNATURE_CHECK (c32toupper, wint_t, (wint_t));
+
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+#include "macros.h"
+
+/* Representation of a multibyte character.  */
+#define MBCHAR_BUF_SIZE 6
+struct multibyte
+{
+  size_t nbytes;             /* number of bytes of current character, > 0 */
+  char buf[MBCHAR_BUF_SIZE]; /* room for the bytes */
+};
+
+/* Returns the value of c32toupper for the multibyte character s[0..n-1],
+   as a multibyte character.  */
+static struct multibyte
+for_character (const char *s, size_t n)
+{
+  mbstate_t state;
+  char32_t wc;
+  size_t ret;
+  struct multibyte result;
+
+  memset (&state, '\0', sizeof (mbstate_t));
+  wc = (char32_t) 0xBADFACE;
+  ret = mbrtoc32 (&wc, s, n, &state);
+  ASSERT (ret == n);
+
+  wc = c32toupper (wc);
+  ASSERT (wc != WEOF);
+
+  memset (&state, '\0', sizeof (mbstate_t));
+  ret = c32rtomb (result.buf, wc, &state);
+  ASSERT (ret != 0);
+  if (ret == (size_t)(-1))
+    /* wc cannot be converted back to multibyte.  */
+    result.nbytes = 0;
+  else
+    {
+      ASSERT (ret <= MBCHAR_BUF_SIZE);
+      result.nbytes = ret;
+    }
+  return result;
+}
+
+int
+main (int argc, char *argv[])
+{
+  wint_t wc;
+  struct multibyte mb;
+  char buf[4];
+
+  /* configure should already have checked that the locale is supported.  */
+  if (setlocale (LC_ALL, "") == NULL)
+    return 1;
+
+  /* Test WEOF.  */
+  wc = c32toupper (WEOF);
+  ASSERT (wc == WEOF);
+
+  /* Test single-byte characters.
+     POSIX specifies in
+       <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html>
+     that
+       - in all locales, the lowercase characters include the a ... z
+         characters, and the corresponding characters A ... Z (if not in a
+         Turkish locale) are uppercase,
+       - in the "POSIX" locale (which is usually the same as the "C" locale),
+         the lowercase characters include only the ASCII a ... z characters,
+         and the corresponding characters A ... Z are uppercase.
+   */
+#if defined __NetBSD__
+  /* towupper is broken in the zh_CN.GB18030 locale on NetBSD 9.0.
+     See <https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=57339>.  */
+  if (!(argc > 1 && argv[1][0] == '4'))
+#endif
+  {
+    int c;
+
+    for (c = 0; c < 0x100; c++)
+      switch (c)
+        {
+        case '\t': case '\v': case '\f':
+        case ' ': case '!': case '"': case '#': case '%':
+        case '&': case '\'': case '(': case ')': case '*':
+        case '+': case ',': case '-': case '.': case '/':
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+        case ':': case ';': case '<': case '=': case '>':
+        case '?':
+        case 'A': case 'B': case 'C': case 'D': case 'E':
+        case 'F': case 'G': case 'H': case 'I': case 'J':
+        case 'K': case 'L': case 'M': case 'N': case 'O':
+        case 'P': case 'Q': case 'R': case 'S': case 'T':
+        case 'U': case 'V': case 'W': case 'X': case 'Y':
+        case 'Z':
+        case '[': case '\\': case ']': case '^': case '_':
+        case 'a': case 'b': case 'c': case 'd': case 'e':
+        case 'f': case 'g': case 'h': case 'i': case 'j':
+        case 'k': case 'l': case 'm': case 'n': case 'o':
+        case 'p': case 'q': case 'r': case 's': case 't':
+        case 'u': case 'v': case 'w': case 'x': case 'y':
+        case 'z': case '{': case '|': case '}': case '~':
+          /* c is in the ISO C "basic character set".  */
+          buf[0] = (unsigned char) c;
+          mb = for_character (buf, 1);
+          switch (c)
+            {
+            case 'a': case 'b': case 'c': case 'd': case 'e':
+            case 'f': case 'g': case 'h': case 'i': case 'j':
+            case 'k': case 'l': case 'm': case 'n': case 'o':
+            case 'p': case 'q': case 'r': case 's': case 't':
+            case 'u': case 'v': case 'w': case 'x': case 'y':
+            case 'z':
+              ASSERT (mb.nbytes == 1);
+              ASSERT ((unsigned char) mb.buf[0] == (unsigned char) c - 'a' + 'A');
+              break;
+            default:
+              ASSERT (mb.nbytes == 1);
+              ASSERT ((unsigned char) mb.buf[0] == c);
+              break;
+            }
+          break;
+        }
+  }
+
+  if (argc > 1)
+    switch (argv[1][0])
+      {
+      case '0':
+        /* C locale; tested above.  */
+        return 0;
+
+      case '1':
+        /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
+        {
+          /* U+00B2 SUPERSCRIPT TWO */
+          mb = for_character ("\262", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\262", 1) == 0);
+        #if !(defined __GLIBC__ || defined __sun || defined __CYGWIN__)
+          /* U+00B5 MICRO SIGN */
+          mb = for_character ("\265", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\265", 1) == 0);
+        #endif
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\311", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\311", 1) == 0);
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\337", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\337", 1) == 0);
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\351", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\311", 1) == 0);
+        #if !(defined __GLIBC__ || defined __DragonFly__ || defined __sun || defined __CYGWIN__ || (defined _WIN32 && !defined __CYGWIN__))
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\377", 1);
+          ASSERT (mb.nbytes == 1);
+          ASSERT (memcmp (mb.buf, "\377", 1) == 0);
+        #endif
+        }
+        return 0;
+
+      case '2':
+        /* Locale encoding is EUC-JP.  */
+        {
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\217\252\261", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\252\261", 3) == 0);
+        #if !defined __NetBSD__
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\217\251\316", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\251\316", 3) == 0);
+        #endif
+        #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\217\253\261", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\252\261", 3) == 0);
+        #endif
+        #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__ || defined __NetBSD__)
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\217\253\363", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\252\363", 3) == 0);
+        #endif
+          /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
+          mb = for_character ("\217\251\250", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\251\250", 3) == 0);
+        #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
+          /* U+0142 LATIN SMALL LETTER L WITH STROKE */
+          mb = for_character ("\217\251\310", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\217\251\250", 3) == 0);
+        #endif
+          /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
+          mb = for_character ("\247\273", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\273", 2) == 0);
+        #if !defined __DragonFly__
+          /* U+0449 CYRILLIC SMALL LETTER SHCHA */
+          mb = for_character ("\247\353", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\273", 2) == 0);
+        #endif
+          /* U+3073 HIRAGANA LETTER BI */
+          mb = for_character ("\244\323", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
+          /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
+          mb = for_character ("\243\307", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\307", 2) == 0);
+        #if !defined __DragonFly__
+          /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
+          mb = for_character ("\243\347", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\307", 2) == 0);
+        #endif
+        }
+        return 0;
+
+      case '3':
+        /* Locale encoding is UTF-8.  */
+        {
+          /* U+00B2 SUPERSCRIPT TWO */
+          mb = for_character ("\302\262", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\302\262", 2) == 0);
+        #if !(defined __GLIBC__ || (defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__ || defined _AIX || defined __sun || defined __CYGWIN__)
+          /* U+00B5 MICRO SIGN */
+          mb = for_character ("\302\265", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\302\265", 2) == 0);
+        #endif
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\303\211", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\303\211", 2) == 0);
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\303\237", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\303\237", 2) == 0);
+        #if !(defined _WIN32 && !defined __CYGWIN__)
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\303\251", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\303\211", 2) == 0);
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\303\277", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\305\270", 2) == 0);
+        #endif
+          /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
+          mb = for_character ("\305\201", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\305\201", 2) == 0);
+          /* U+0142 LATIN SMALL LETTER L WITH STROKE */
+          mb = for_character ("\305\202", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\305\201", 2) == 0);
+          /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
+          mb = for_character ("\320\251", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\320\251", 2) == 0);
+          /* U+0449 CYRILLIC SMALL LETTER SHCHA */
+          mb = for_character ("\321\211", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\320\251", 2) == 0);
+          /* U+05D5 HEBREW LETTER VAV */
+          mb = for_character ("\327\225", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\327\225", 2) == 0);
+          /* U+3073 HIRAGANA LETTER BI */
+          mb = for_character ("\343\201\263", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\343\201\263", 3) == 0);
+          /* U+3162 HANGUL LETTER YI */
+          mb = for_character ("\343\205\242", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\343\205\242", 3) == 0);
+          /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
+          mb = for_character ("\357\274\247", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\357\274\247", 3) == 0);
+          /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
+          mb = for_character ("\357\275\207", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\357\274\247", 3) == 0);
+          /* U+FFDB HALFWIDTH HANGUL LETTER YI */
+          mb = for_character ("\357\277\233", 3);
+          ASSERT (mb.nbytes == 3);
+          ASSERT (memcmp (mb.buf, "\357\277\233", 3) == 0);
+          /* U+10419 DESERET CAPITAL LETTER EF */
+          mb = for_character ("\360\220\220\231", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\360\220\220\231", 4) == 0);
+        #if !(defined __DragonFly__ || defined __sun)
+          /* U+10441 DESERET SMALL LETTER EF */
+          mb = for_character ("\360\220\221\201", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\360\220\220\231", 4) == 0);
+        #endif
+          /* U+E0041 TAG LATIN CAPITAL LETTER A */
+          mb = for_character ("\363\240\201\201", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\363\240\201\201", 4) == 0);
+          /* U+E0061 TAG LATIN SMALL LETTER A */
+          mb = for_character ("\363\240\201\241", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\363\240\201\241", 4) == 0);
+        }
+        return 0;
+
+      case '4':
+        /* Locale encoding is GB18030.  */
+        {
+          /* U+00B2 SUPERSCRIPT TWO */
+          mb = for_character ("\201\060\205\065", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\205\065", 4) == 0);
+        #if !(defined __GLIBC__ || (defined __APPLE__ && defined __MACH__) || defined __NetBSD__)
+          /* U+00B5 MICRO SIGN */
+          mb = for_character ("\201\060\205\070", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\205\070", 4) == 0);
+        #endif
+          /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
+          mb = for_character ("\201\060\207\067", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\207\067", 4) == 0);
+          /* U+00DF LATIN SMALL LETTER SHARP S */
+          mb = for_character ("\201\060\211\070", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\211\070", 4) == 0);
+        #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
+          /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
+          mb = for_character ("\250\246", 2);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\207\067", 4) == 0);
+          /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
+          mb = for_character ("\201\060\213\067", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\227\060", 4) == 0);
+        #endif
+          /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
+          mb = for_character ("\201\060\221\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\221\071", 4) == 0);
+        #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
+          /* U+0142 LATIN SMALL LETTER L WITH STROKE */
+          mb = for_character ("\201\060\222\060", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\221\071", 4) == 0);
+        #endif
+          /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
+          mb = for_character ("\247\273", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\273", 2) == 0);
+        #if !(defined __FreeBSD__ || defined __DragonFly__)
+          /* U+0449 CYRILLIC SMALL LETTER SHCHA */
+          mb = for_character ("\247\353", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\247\273", 2) == 0);
+        #endif
+          /* U+05D5 HEBREW LETTER VAV */
+          mb = for_character ("\201\060\371\067", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\060\371\067", 4) == 0);
+          /* U+3073 HIRAGANA LETTER BI */
+          mb = for_character ("\244\323", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
+          /* U+3162 HANGUL LETTER YI */
+          mb = for_character ("\201\071\256\062", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\201\071\256\062", 4) == 0);
+          /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
+          mb = for_character ("\243\307", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\307", 2) == 0);
+        #if !defined __DragonFly__
+          /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
+          mb = for_character ("\243\347", 2);
+          ASSERT (mb.nbytes == 2);
+          ASSERT (memcmp (mb.buf, "\243\307", 2) == 0);
+        #endif
+          /* U+FFDB HALFWIDTH HANGUL LETTER YI */
+          mb = for_character ("\204\061\241\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\204\061\241\071", 4) == 0);
+          /* U+10419 DESERET CAPITAL LETTER EF */
+          mb = for_character ("\220\060\351\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\220\060\351\071", 4) == 0);
+        #if !((defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__ || defined __sun)
+          /* U+10441 DESERET SMALL LETTER EF */
+          mb = for_character ("\220\060\355\071", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\220\060\351\071", 4) == 0);
+        #endif
+          /* U+E0041 TAG LATIN CAPITAL LETTER A */
+          mb = for_character ("\323\066\234\063", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\323\066\234\063", 4) == 0);
+          /* U+E0061 TAG LATIN SMALL LETTER A */
+          mb = for_character ("\323\066\237\065", 4);
+          ASSERT (mb.nbytes == 4);
+          ASSERT (memcmp (mb.buf, "\323\066\237\065", 4) == 0);
+        }
+        return 0;
+
+      }
+
+  return 1;
+}
diff --git a/tests/test-c32toupper.sh b/tests/test-c32toupper.sh
new file mode 100755
index 0000000000..1c253e5460
--- /dev/null
+++ b/tests/test-c32toupper.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+# Allow distinguishing the various invocations in the .log file.
+set -x
+
+# Test in the POSIX locale.
+LC_ALL=C     ${CHECKER} ./test-c32toupper${EXEEXT} 0 || exit 1
+LC_ALL=POSIX ${CHECKER} ./test-c32toupper${EXEEXT} 0 || exit 1
+
+# Test in an ISO-8859-1 or ISO-8859-15 locale.
+: "${LOCALE_FR=fr_FR}"
+if test $LOCALE_FR != none; then
+  LC_ALL=$LOCALE_FR \
+  ${CHECKER} ./test-c32toupper${EXEEXT} 1 \
+  || exit 1
+fi
+
+# Test whether a specific EUC-JP locale is installed.
+: "${LOCALE_JA=ja_JP}"
+if test $LOCALE_JA != none; then
+  LC_ALL=$LOCALE_JA \
+  ${CHECKER} ./test-c32toupper${EXEEXT} 2 \
+  || exit 1
+fi
+
+# Test whether a specific UTF-8 locale is installed.
+: "${LOCALE_FR_UTF8=fr_FR.UTF-8}"
+if test $LOCALE_FR_UTF8 != none; then
+  LC_ALL=$LOCALE_FR_UTF8 \
+  ${CHECKER} ./test-c32toupper${EXEEXT} 3 \
+  || exit 1
+fi
+
+# Test whether a specific GB18030 locale is installed.
+: "${LOCALE_ZH_CN=zh_CN.GB18030}"
+if test $LOCALE_ZH_CN != none; then
+  LC_ALL=$LOCALE_ZH_CN \
+  ${CHECKER} ./test-c32toupper${EXEEXT} 4 \
+  || exit 1
+fi
+
+exit 0
-- 
2.34.1

new modules c32tolower, c32toupper

Reply via email to