This module provides a function c32swidth(), that is like wcswidth(), except that it takes a 32-bit wide string instead of a wide string as argument.
2023-05-04 Bruno Haible <br...@clisp.org> c32swidth: Add tests. * tests/test-c32swidth.c: New file, based on tests/test-c32width.c. * modules/c32swidth-tests: New file. c32swidth: New module. * lib/uchar.in.h (c32swidth): New declaration. * lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH. * lib/wcswidth.c: Define FUNC, UNIT, CHARACTER_WIDTH before including wcswidth-impl.h. * lib/c32swidth.c: New file. * modules/c32swidth: New file. * m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize GNULIB_C32SWIDTH. * modules/uchar (Makefile.am): Substitute GNULIB_C32SWIDTH.
>From 15880a3da91a0403eb287a84db90b54713ac4a09 Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Thu, 4 May 2023 23:27:12 +0200 Subject: [PATCH 1/2] c32swidth: New module. * lib/uchar.in.h (c32swidth): New declaration. * lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH. * lib/wcswidth.c: Define FUNC, UNIT, CHARACTER_WIDTH before including wcswidth-impl.h. * lib/c32swidth.c: New file. * modules/c32swidth: New file. * m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize GNULIB_C32SWIDTH. * modules/uchar (Makefile.am): Substitute GNULIB_C32SWIDTH. --- ChangeLog | 13 +++++++++++++ lib/c32swidth.c | 43 +++++++++++++++++++++++++++++++++++++++++++ lib/uchar.in.h | 19 +++++++++++++++++++ lib/wcswidth-impl.h | 14 +++++++------- lib/wcswidth.c | 3 +++ m4/uchar_h.m4 | 3 ++- modules/c32swidth | 36 ++++++++++++++++++++++++++++++++++++ modules/uchar | 1 + 8 files changed, 124 insertions(+), 8 deletions(-) create mode 100644 lib/c32swidth.c create mode 100644 modules/c32swidth diff --git a/ChangeLog b/ChangeLog index af99e65a65..d55b5e5120 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2023-05-04 Bruno Haible <br...@clisp.org> + + c32swidth: New module. + * lib/uchar.in.h (c32swidth): New declaration. + * lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH. + * lib/wcswidth.c: Define FUNC, UNIT, CHARACTER_WIDTH before including + wcswidth-impl.h. + * lib/c32swidth.c: New file. + * modules/c32swidth: New file. + * m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize + GNULIB_C32SWIDTH. + * modules/uchar (Makefile.am): Substitute GNULIB_C32SWIDTH. + 2023-05-04 Bruno Haible <br...@clisp.org> wcswidth: Fix result in case of overflow. diff --git a/lib/c32swidth.c b/lib/c32swidth.c new file mode 100644 index 0000000000..2f7adcf74c --- /dev/null +++ b/lib/c32swidth.c @@ -0,0 +1,43 @@ +/* Determine number of screen columns needed for a size-bounded 32-bit wide string. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +#define IN_C32SWIDTH +/* Specification. */ +#include <uchar.h> + +#if _GL_WCHAR_T_IS_UCS4 && !GNULIB_defined_mbstate_t + +# include <wchar.h> + +_GL_EXTERN_INLINE +int +c32swidth (const char32_t *s, size_t n) +{ + return wcswidth ((const wchar_t *) s, n); +} + +#else + +# include <limits.h> + +# define FUNC c32swidth +# define UNIT char32_t +# define CHARACTER_WIDTH c32width +# include "wcswidth-impl.h" + +#endif diff --git a/lib/uchar.in.h b/lib/uchar.in.h index 3815af4c26..8bf6176b8c 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -494,6 +494,25 @@ _GL_CXXALIASWARN (c32stombs); #endif +/* Number of screen columns needed for a size-bounded 32-bit wide string. */ +#if @GNULIB_C32SWIDTH@ +# if (_GL_WCHAR_T_IS_UCS4 && !GNULIB_defined_mbstate_t) && !defined IN_C32SWIDTH +_GL_BEGIN_C_LINKAGE +_GL_INLINE _GL_ARG_NONNULL ((1)) int +c32swidth (const char32_t *s, size_t n) +{ + return wcswidth ((const wchar_t *) s, n); +} +_GL_END_C_LINKAGE +# else +_GL_FUNCDECL_SYS (c32swidth, int, (const char32_t *s, size_t n) + _GL_ARG_NONNULL ((1))); +# endif +_GL_CXXALIAS_SYS (c32swidth, int, (const char32_t *s, size_t n)); +_GL_CXXALIASWARN (c32swidth); +#endif + + /* Converts a 32-bit wide character to unibyte character. Returns the single-byte representation of WC if it exists, or EOF otherwise. */ diff --git a/lib/wcswidth-impl.h b/lib/wcswidth-impl.h index a879bfdd93..34cb0b9814 100644 --- a/lib/wcswidth-impl.h +++ b/lib/wcswidth-impl.h @@ -16,16 +16,16 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. */ int -wcswidth (const wchar_t *s, size_t n) +FUNC (const UNIT *s, size_t n) { int count = 0; for (; n > 0; s++, n--) { - wchar_t c = *s; - if (c == (wchar_t)'\0') + UNIT c = *s; + if (c == (UNIT)'\0') break; { - int width = wcwidth (c); + int width = CHARACTER_WIDTH (c); if (width < 0) goto found_nonprinting; if (width > INT_MAX - count) @@ -39,11 +39,11 @@ wcswidth (const wchar_t *s, size_t n) Continue searching for a non-printing wide character. */ for (; n > 0; s++, n--) { - wchar_t c = *s; - if (c == (wchar_t)'\0') + UNIT c = *s; + if (c == (UNIT)'\0') break; { - int width = wcwidth (c); + int width = CHARACTER_WIDTH (c); if (width < 0) goto found_nonprinting; } diff --git a/lib/wcswidth.c b/lib/wcswidth.c index 8188e380fd..408b826c62 100644 --- a/lib/wcswidth.c +++ b/lib/wcswidth.c @@ -22,4 +22,7 @@ #include <limits.h> +#define FUNC wcswidth +#define UNIT wchar_t +#define CHARACTER_WIDTH wcwidth #include "wcswidth-impl.h" diff --git a/m4/uchar_h.m4 b/m4/uchar_h.m4 index 5c04e2a6ee..0a48eb20c4 100644 --- a/m4/uchar_h.m4 +++ b/m4/uchar_h.m4 @@ -1,4 +1,4 @@ -# uchar_h.m4 serial 25 +# uchar_h.m4 serial 26 dnl Copyright (C) 2019-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -221,6 +221,7 @@ AC_DEFUN([gl_UCHAR_H_REQUIRE_DEFAULTS] gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SNRTOMBS]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SRTOMBS]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32STOMBS]) + gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SWIDTH]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32TOB]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBRTOC32]) gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBSNRTOC32S]) diff --git a/modules/c32swidth b/modules/c32swidth new file mode 100644 index 0000000000..df60f6b473 --- /dev/null +++ b/modules/c32swidth @@ -0,0 +1,36 @@ +Description: +c32swidth() function: Determine the number of screen columns needed for +a size-bounded 32-bit wide string. + +Files: +lib/c32swidth.c +lib/wcswidth-impl.h + +Depends-on: +uchar +wchar +wcswidth +c32width + +configure.ac: +AC_REQUIRE([gl_UCHAR_H]) +dnl Determine REPLACE_MBSTATE_T, from which GNULIB_defined_mbstate_t is +dnl determined. It describes how mbrtoc32 is implemented. +AC_REQUIRE([gl_MBSTATE_T_BROKEN]) +AC_REQUIRE([gl_MBRTOC32_SANITYCHECK]) +gl_UCHAR_MODULE_INDICATOR([c32swidth]) + +Makefile.am: +lib_SOURCES += c32swidth.c + +Include: +<uchar.h> + +Link: +$(LTLIBUNISTRING) when linking with libtool, $(LIBUNISTRING) otherwise + +License: +LGPLv2+ + +Maintainer: +Bruno Haible diff --git a/modules/uchar b/modules/uchar index 3c6f3963b9..948bcd7993 100644 --- a/modules/uchar +++ b/modules/uchar @@ -58,6 +58,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) -e 's/@''GNULIB_C32SNRTOMBS''@/$(GNULIB_C32SNRTOMBS)/g' \ -e 's/@''GNULIB_C32SRTOMBS''@/$(GNULIB_C32SRTOMBS)/g' \ -e 's/@''GNULIB_C32STOMBS''@/$(GNULIB_C32STOMBS)/g' \ + -e 's/@''GNULIB_C32SWIDTH''@/$(GNULIB_C32SWIDTH)/g' \ -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \ -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \ -e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \ -- 2.34.1
>From 8ebfab32803787379c8201fb9f650fcb35e33ecc Mon Sep 17 00:00:00 2001 From: Bruno Haible <br...@clisp.org> Date: Thu, 4 May 2023 23:27:16 +0200 Subject: [PATCH 2/2] c32swidth: Add tests. * tests/test-c32swidth.c: New file, based on tests/test-c32width.c. * modules/c32swidth-tests: New file. --- ChangeLog | 4 ++ modules/c32swidth-tests | 15 +++++ tests/test-c32swidth.c | 128 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 modules/c32swidth-tests create mode 100644 tests/test-c32swidth.c diff --git a/ChangeLog b/ChangeLog index d55b5e5120..aaffe12fc1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2023-05-04 Bruno Haible <br...@clisp.org> + c32swidth: Add tests. + * tests/test-c32swidth.c: New file, based on tests/test-c32width.c. + * modules/c32swidth-tests: New file. + c32swidth: New module. * lib/uchar.in.h (c32swidth): New declaration. * lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH. diff --git a/modules/c32swidth-tests b/modules/c32swidth-tests new file mode 100644 index 0000000000..e591b9109f --- /dev/null +++ b/modules/c32swidth-tests @@ -0,0 +1,15 @@ +Files: +tests/test-c32swidth.c +tests/signature.h +tests/macros.h + +Depends-on: +localcharset +setlocale + +configure.ac: + +Makefile.am: +TESTS += test-c32swidth +check_PROGRAMS += test-c32swidth +test_c32swidth_LDADD = $(LDADD) $(SETLOCALE_LIB) $(LIBUNISTRING) diff --git a/tests/test-c32swidth.c b/tests/test-c32swidth.c new file mode 100644 index 0000000000..8bf19afec9 --- /dev/null +++ b/tests/test-c32swidth.c @@ -0,0 +1,128 @@ +/* Test of c32swidth() function. + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <br...@clisp.org>, 2023. */ + +#include <config.h> + +#include <uchar.h> + +#include "signature.h" +SIGNATURE_CHECK (c32swidth, int, (const char32_t *, size_t)); + +#include <locale.h> +#include <string.h> + +#include "localcharset.h" +#include "macros.h" + +int +main () +{ + int w; + + /* Switch to an UTF-8 locale. */ + if (!((setlocale (LC_ALL, "fr_FR.UTF-8") != NULL + || setlocale (LC_ALL, "de_DE.UTF-8") != NULL + || setlocale (LC_ALL, "es_ES.UTF-8") != NULL + || setlocale (LC_ALL, "en_US.UTF-8") != NULL) + /* Check whether it's really an UTF-8 locale. + On native Windows, these setlocale calls succeed but the encoding + of this locale is CP1252, not UTF-8. */ + && strcmp (locale_charset (), "UTF-8") == 0)) + { + fprintf (stderr, "Skipping test: No common Unicode locale is installed\n"); + return 77; + } + + { + char32_t s[] = { 'f', 'p', 0, 'x' }; + w = c32swidth (s, 0); + ASSERT (w == 0); + w = c32swidth (s, 1); + ASSERT (w == 1); + w = c32swidth (s, 2); + ASSERT (w == 2); + w = c32swidth (s, 3); + ASSERT (w == 2); + w = c32swidth (s, 4); + ASSERT (w == 2); + w = c32swidth (s, (size_t)(-1)); + ASSERT (w == 2); + } + + { + char32_t s[] = { 'f', 'p', '\n', 'x' }; + w = c32swidth (s, 0); + ASSERT (w == 0); + w = c32swidth (s, 1); + ASSERT (w == 1); + w = c32swidth (s, 2); + ASSERT (w == 2); + w = c32swidth (s, 3); + ASSERT (w == -1); + w = c32swidth (s, 4); + ASSERT (w == -1); + } + + /* Test width of some non-spacing characters. */ + { + char32_t s[] = { 'a', 0x0301 }; + w = c32swidth (s, 2); + ASSERT (w == 1); + } + + /* Test width of some zero width characters. */ + { + char32_t s[] = { 'i', 0x200B, 'j' }; + w = c32swidth (s, 3); + ASSERT (w == 2); + } + + /* Test width of some math symbols. + U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt + (see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>). + The Unicode Standard Annex 11 + <https://www.unicode.org/reports/tr11/tr11-36.html> + says + "Ambiguous characters behave like wide or narrow characters + depending on the context (language tag, script identification, + associated font, source of data, or explicit markup; all can + provide the context). If the context cannot be established + reliably, they should be treated as narrow characters by default." + For c32width(), the only available context information is the locale. + The chosen locale above is a Western locale, not an East Asian locale, + therefore U+2202 should be treated like a narrow character. */ + { + char32_t s[] = { 0x2202 }; + w = c32swidth (s, 1); + ASSERT (w == 1); + } + + /* Test width of some CJK characters. */ + { + char32_t s[] = { 0x4E2D, 0x6587 }; + w = c32swidth (s, 2); + ASSERT (w == 4); + } + { + char32_t s[] = { 0x20369, 0x2F876 }; + w = c32swidth (s, 2); + ASSERT (w == 4); + } + + return 0; +} -- 2.34.1