This module provides a function c32swidth(), that is like wcswidth(), except
that it takes a 32-bit wide string instead of a wide string as argument.


2023-05-04  Bruno Haible  <br...@clisp.org>

        c32swidth: Add tests.
        * tests/test-c32swidth.c: New file, based on tests/test-c32width.c.
        * modules/c32swidth-tests: New file.

        c32swidth: New module.
        * lib/uchar.in.h (c32swidth): New declaration.
        * lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH.
        * lib/wcswidth.c: Define FUNC, UNIT, CHARACTER_WIDTH before including
        wcswidth-impl.h.
        * lib/c32swidth.c: New file.
        * modules/c32swidth: New file.
        * m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
        GNULIB_C32SWIDTH.
        * modules/uchar (Makefile.am): Substitute GNULIB_C32SWIDTH.

>From 15880a3da91a0403eb287a84db90b54713ac4a09 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Thu, 4 May 2023 23:27:12 +0200
Subject: [PATCH 1/2] c32swidth: New module.

* lib/uchar.in.h (c32swidth): New declaration.
* lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH.
* lib/wcswidth.c: Define FUNC, UNIT, CHARACTER_WIDTH before including
wcswidth-impl.h.
* lib/c32swidth.c: New file.
* modules/c32swidth: New file.
* m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
GNULIB_C32SWIDTH.
* modules/uchar (Makefile.am): Substitute GNULIB_C32SWIDTH.
---
 ChangeLog           | 13 +++++++++++++
 lib/c32swidth.c     | 43 +++++++++++++++++++++++++++++++++++++++++++
 lib/uchar.in.h      | 19 +++++++++++++++++++
 lib/wcswidth-impl.h | 14 +++++++-------
 lib/wcswidth.c      |  3 +++
 m4/uchar_h.m4       |  3 ++-
 modules/c32swidth   | 36 ++++++++++++++++++++++++++++++++++++
 modules/uchar       |  1 +
 8 files changed, 124 insertions(+), 8 deletions(-)
 create mode 100644 lib/c32swidth.c
 create mode 100644 modules/c32swidth

diff --git a/ChangeLog b/ChangeLog
index af99e65a65..d55b5e5120 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2023-05-04  Bruno Haible  <br...@clisp.org>
+
+	c32swidth: New module.
+	* lib/uchar.in.h (c32swidth): New declaration.
+	* lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH.
+	* lib/wcswidth.c: Define FUNC, UNIT, CHARACTER_WIDTH before including
+	wcswidth-impl.h.
+	* lib/c32swidth.c: New file.
+	* modules/c32swidth: New file.
+	* m4/uchar_h.m4 (gl_UCHAR_H_REQUIRE_DEFAULTS): Initialize
+	GNULIB_C32SWIDTH.
+	* modules/uchar (Makefile.am): Substitute GNULIB_C32SWIDTH.
+
 2023-05-04  Bruno Haible  <br...@clisp.org>
 
 	wcswidth: Fix result in case of overflow.
diff --git a/lib/c32swidth.c b/lib/c32swidth.c
new file mode 100644
index 0000000000..2f7adcf74c
--- /dev/null
+++ b/lib/c32swidth.c
@@ -0,0 +1,43 @@
+/* Determine number of screen columns needed for a size-bounded 32-bit wide string.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#define IN_C32SWIDTH
+/* Specification.  */
+#include <uchar.h>
+
+#if _GL_WCHAR_T_IS_UCS4 && !GNULIB_defined_mbstate_t
+
+# include <wchar.h>
+
+_GL_EXTERN_INLINE
+int
+c32swidth (const char32_t *s, size_t n)
+{
+  return wcswidth ((const wchar_t *) s, n);
+}
+
+#else
+
+# include <limits.h>
+
+# define FUNC c32swidth
+# define UNIT char32_t
+# define CHARACTER_WIDTH c32width
+# include "wcswidth-impl.h"
+
+#endif
diff --git a/lib/uchar.in.h b/lib/uchar.in.h
index 3815af4c26..8bf6176b8c 100644
--- a/lib/uchar.in.h
+++ b/lib/uchar.in.h
@@ -494,6 +494,25 @@ _GL_CXXALIASWARN (c32stombs);
 #endif
 
 
+/* Number of screen columns needed for a size-bounded 32-bit wide string.  */
+#if @GNULIB_C32SWIDTH@
+# if (_GL_WCHAR_T_IS_UCS4 && !GNULIB_defined_mbstate_t) && !defined IN_C32SWIDTH
+_GL_BEGIN_C_LINKAGE
+_GL_INLINE _GL_ARG_NONNULL ((1)) int
+c32swidth (const char32_t *s, size_t n)
+{
+  return wcswidth ((const wchar_t *) s, n);
+}
+_GL_END_C_LINKAGE
+# else
+_GL_FUNCDECL_SYS (c32swidth, int, (const char32_t *s, size_t n)
+                                  _GL_ARG_NONNULL ((1)));
+# endif
+_GL_CXXALIAS_SYS (c32swidth, int, (const char32_t *s, size_t n));
+_GL_CXXALIASWARN (c32swidth);
+#endif
+
+
 /* Converts a 32-bit wide character to unibyte character.
    Returns the single-byte representation of WC if it exists,
    or EOF otherwise.  */
diff --git a/lib/wcswidth-impl.h b/lib/wcswidth-impl.h
index a879bfdd93..34cb0b9814 100644
--- a/lib/wcswidth-impl.h
+++ b/lib/wcswidth-impl.h
@@ -16,16 +16,16 @@
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
 
 int
-wcswidth (const wchar_t *s, size_t n)
+FUNC (const UNIT *s, size_t n)
 {
   int count = 0;
   for (; n > 0; s++, n--)
     {
-      wchar_t c = *s;
-      if (c == (wchar_t)'\0')
+      UNIT c = *s;
+      if (c == (UNIT)'\0')
         break;
       {
-        int width = wcwidth (c);
+        int width = CHARACTER_WIDTH (c);
         if (width < 0)
           goto found_nonprinting;
         if (width > INT_MAX - count)
@@ -39,11 +39,11 @@ wcswidth (const wchar_t *s, size_t n)
      Continue searching for a non-printing wide character.  */
   for (; n > 0; s++, n--)
     {
-      wchar_t c = *s;
-      if (c == (wchar_t)'\0')
+      UNIT c = *s;
+      if (c == (UNIT)'\0')
         break;
       {
-        int width = wcwidth (c);
+        int width = CHARACTER_WIDTH (c);
         if (width < 0)
           goto found_nonprinting;
       }
diff --git a/lib/wcswidth.c b/lib/wcswidth.c
index 8188e380fd..408b826c62 100644
--- a/lib/wcswidth.c
+++ b/lib/wcswidth.c
@@ -22,4 +22,7 @@
 
 #include <limits.h>
 
+#define FUNC wcswidth
+#define UNIT wchar_t
+#define CHARACTER_WIDTH wcwidth
 #include "wcswidth-impl.h"
diff --git a/m4/uchar_h.m4 b/m4/uchar_h.m4
index 5c04e2a6ee..0a48eb20c4 100644
--- a/m4/uchar_h.m4
+++ b/m4/uchar_h.m4
@@ -1,4 +1,4 @@
-# uchar_h.m4 serial 25
+# uchar_h.m4 serial 26
 dnl Copyright (C) 2019-2023 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -221,6 +221,7 @@ AC_DEFUN([gl_UCHAR_H_REQUIRE_DEFAULTS]
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SNRTOMBS])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SRTOMBS])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32STOMBS])
+    gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32SWIDTH])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_C32TOB])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBRTOC32])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MBSNRTOC32S])
diff --git a/modules/c32swidth b/modules/c32swidth
new file mode 100644
index 0000000000..df60f6b473
--- /dev/null
+++ b/modules/c32swidth
@@ -0,0 +1,36 @@
+Description:
+c32swidth() function: Determine the number of screen columns needed for
+a size-bounded 32-bit wide string.
+
+Files:
+lib/c32swidth.c
+lib/wcswidth-impl.h
+
+Depends-on:
+uchar
+wchar
+wcswidth
+c32width
+
+configure.ac:
+AC_REQUIRE([gl_UCHAR_H])
+dnl Determine REPLACE_MBSTATE_T, from which GNULIB_defined_mbstate_t is
+dnl determined.  It describes how mbrtoc32 is implemented.
+AC_REQUIRE([gl_MBSTATE_T_BROKEN])
+AC_REQUIRE([gl_MBRTOC32_SANITYCHECK])
+gl_UCHAR_MODULE_INDICATOR([c32swidth])
+
+Makefile.am:
+lib_SOURCES += c32swidth.c
+
+Include:
+<uchar.h>
+
+Link:
+$(LTLIBUNISTRING) when linking with libtool, $(LIBUNISTRING) otherwise
+
+License:
+LGPLv2+
+
+Maintainer:
+Bruno Haible
diff --git a/modules/uchar b/modules/uchar
index 3c6f3963b9..948bcd7993 100644
--- a/modules/uchar
+++ b/modules/uchar
@@ -58,6 +58,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H)
 	      -e 's/@''GNULIB_C32SNRTOMBS''@/$(GNULIB_C32SNRTOMBS)/g' \
 	      -e 's/@''GNULIB_C32SRTOMBS''@/$(GNULIB_C32SRTOMBS)/g' \
 	      -e 's/@''GNULIB_C32STOMBS''@/$(GNULIB_C32STOMBS)/g' \
+	      -e 's/@''GNULIB_C32SWIDTH''@/$(GNULIB_C32SWIDTH)/g' \
 	      -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \
 	      -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \
 	      -e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \
-- 
2.34.1

>From 8ebfab32803787379c8201fb9f650fcb35e33ecc Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Thu, 4 May 2023 23:27:16 +0200
Subject: [PATCH 2/2] c32swidth: Add tests.

* tests/test-c32swidth.c: New file, based on tests/test-c32width.c.
* modules/c32swidth-tests: New file.
---
 ChangeLog               |   4 ++
 modules/c32swidth-tests |  15 +++++
 tests/test-c32swidth.c  | 128 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+)
 create mode 100644 modules/c32swidth-tests
 create mode 100644 tests/test-c32swidth.c

diff --git a/ChangeLog b/ChangeLog
index d55b5e5120..aaffe12fc1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2023-05-04  Bruno Haible  <br...@clisp.org>
 
+	c32swidth: Add tests.
+	* tests/test-c32swidth.c: New file, based on tests/test-c32width.c.
+	* modules/c32swidth-tests: New file.
+
 	c32swidth: New module.
 	* lib/uchar.in.h (c32swidth): New declaration.
 	* lib/wcswidth-impl.h: Use macros FUNC, UNIT, CHARACTER_WIDTH.
diff --git a/modules/c32swidth-tests b/modules/c32swidth-tests
new file mode 100644
index 0000000000..e591b9109f
--- /dev/null
+++ b/modules/c32swidth-tests
@@ -0,0 +1,15 @@
+Files:
+tests/test-c32swidth.c
+tests/signature.h
+tests/macros.h
+
+Depends-on:
+localcharset
+setlocale
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-c32swidth
+check_PROGRAMS += test-c32swidth
+test_c32swidth_LDADD = $(LDADD) $(SETLOCALE_LIB) $(LIBUNISTRING)
diff --git a/tests/test-c32swidth.c b/tests/test-c32swidth.c
new file mode 100644
index 0000000000..8bf19afec9
--- /dev/null
+++ b/tests/test-c32swidth.c
@@ -0,0 +1,128 @@
+/* Test of c32swidth() function.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <br...@clisp.org>, 2023.  */
+
+#include <config.h>
+
+#include <uchar.h>
+
+#include "signature.h"
+SIGNATURE_CHECK (c32swidth, int, (const char32_t *, size_t));
+
+#include <locale.h>
+#include <string.h>
+
+#include "localcharset.h"
+#include "macros.h"
+
+int
+main ()
+{
+  int w;
+
+  /* Switch to an UTF-8 locale.  */
+  if (!((setlocale (LC_ALL, "fr_FR.UTF-8") != NULL
+         || setlocale (LC_ALL, "de_DE.UTF-8") != NULL
+         || setlocale (LC_ALL, "es_ES.UTF-8") != NULL
+         || setlocale (LC_ALL, "en_US.UTF-8") != NULL)
+        /* Check whether it's really an UTF-8 locale.
+           On native Windows, these setlocale calls succeed but the encoding
+           of this locale is CP1252, not UTF-8.  */
+        && strcmp (locale_charset (), "UTF-8") == 0))
+    {
+      fprintf (stderr, "Skipping test: No common Unicode locale is installed\n");
+      return 77;
+    }
+
+  {
+    char32_t s[] = { 'f', 'p', 0, 'x' };
+    w = c32swidth (s, 0);
+    ASSERT (w == 0);
+    w = c32swidth (s, 1);
+    ASSERT (w == 1);
+    w = c32swidth (s, 2);
+    ASSERT (w == 2);
+    w = c32swidth (s, 3);
+    ASSERT (w == 2);
+    w = c32swidth (s, 4);
+    ASSERT (w == 2);
+    w = c32swidth (s, (size_t)(-1));
+    ASSERT (w == 2);
+  }
+
+  {
+    char32_t s[] = { 'f', 'p', '\n', 'x' };
+    w = c32swidth (s, 0);
+    ASSERT (w == 0);
+    w = c32swidth (s, 1);
+    ASSERT (w == 1);
+    w = c32swidth (s, 2);
+    ASSERT (w == 2);
+    w = c32swidth (s, 3);
+    ASSERT (w == -1);
+    w = c32swidth (s, 4);
+    ASSERT (w == -1);
+  }
+
+  /* Test width of some non-spacing characters.  */
+  {
+    char32_t s[] = { 'a', 0x0301 };
+    w = c32swidth (s, 2);
+    ASSERT (w == 1);
+  }
+
+  /* Test width of some zero width characters.  */
+  {
+    char32_t s[] = { 'i', 0x200B, 'j' };
+    w = c32swidth (s, 3);
+    ASSERT (w == 2);
+  }
+
+  /* Test width of some math symbols.
+     U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt
+     (see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>).
+     The Unicode Standard Annex 11
+     <https://www.unicode.org/reports/tr11/tr11-36.html>
+     says
+       "Ambiguous characters behave like wide or narrow characters
+        depending on the context (language tag, script identification,
+        associated font, source of data, or explicit markup; all can
+        provide the context). If the context cannot be established
+        reliably, they should be treated as narrow characters by default."
+     For c32width(), the only available context information is the locale.
+     The chosen locale above is a Western locale, not an East Asian locale,
+     therefore U+2202 should be treated like a narrow character.  */
+  {
+    char32_t s[] = { 0x2202 };
+    w = c32swidth (s, 1);
+    ASSERT (w == 1);
+  }
+
+  /* Test width of some CJK characters.  */
+  {
+    char32_t s[] = { 0x4E2D, 0x6587 };
+    w = c32swidth (s, 2);
+    ASSERT (w == 4);
+  }
+  {
+    char32_t s[] = { 0x20369, 0x2F876 };
+    w = c32swidth (s, 2);
+    ASSERT (w == 4);
+  }
+
+  return 0;
+}
-- 
2.34.1

Reply via email to