I wrote:
> In Gnulib, a workaround for this issue was added to the 'mbrtowc' module
> in 2016. But this is not the only function affected by this issue.

That's not all. *wprintf also invokes mbrtowc, for the %c directive and
the %s directive. It is affected by the glibc bug:

==================== foo.c ===================
#include <stdio.h>
#include <wchar.h>

int main ()
{
  wchar_t buf[10];
  int ret;
  ret = swprintf (buf, 10, L"%c", 0x80);
  printf ("ret = %d\n", ret);
  ret = swprintf (buf, 10, L"%s", "\200");
  printf ("ret = %d\n", ret);
}
==============================================
$ gcc -Wall foo.c && ./a.out
ret = -1
ret = -1

After adding corresponding unit tests to
test-vasnwprintf-posix.c, the %c tests indeed fail.
For the %s directive, vasnprintf.c already contains its own code,
that invokes the overridden mbrlen and mbrtowc functions, therefore
the %s tests already pass.


2023-04-01  Bruno Haible  <br...@clisp.org>

        vasnwprintf-posix: Fix behaviour in the C locale.
        * lib/vasnprintf.c (VASNPRINTF): If NEED_WPRINTF_DIRECTIVE_C is set,
        implement the 'c' directive here.
        * m4/vasnprintf.m4 (gl_PREREQ_VASNWPRINTF): Invoke gl_MBRTOWC_C_LOCALE.
        If mbrtowc is buggy in the C locale, define NEED_WPRINTF_DIRECTIVE_C.
        * modules/vasnwprintf (Files): Add m4/mbrtowc.m4.
        * tests/test-vasnwprintf-posix.c (test_function): Add tests of %s and %c
        in the C locale.
        * doc/posix-functions/fwprintf.texi: Mention the C locale behaviour bug.
        * doc/posix-functions/swprintf.texi: Likewise.
        * doc/posix-functions/vfwprintf.texi: Likewise.
        * doc/posix-functions/vswprintf.texi: Likewise.
        * doc/posix-functions/vwprintf.texi: Likewise.
        * doc/posix-functions/wprintf.texi: Likewise.

diff --git a/doc/posix-functions/fwprintf.texi 
b/doc/posix-functions/fwprintf.texi
index 2b4600de11..db42363cfa 100644
--- a/doc/posix-functions/fwprintf.texi
+++ b/doc/posix-functions/fwprintf.texi
@@ -34,6 +34,10 @@
 argument of @code{strerror(errno)} (or a version of @code{strerror_r})
 instead.
 @item
+In the C or POSIX locales, the @code{%c} and @code{%s} conversions may fail
+on some platforms:
+glibc 2.35.
+@item
 When formatting an integer with grouping flag, this function inserts thousands
 separators even in the "C" locale on some platforms:
 NetBSD 5.1.
diff --git a/doc/posix-functions/swprintf.texi 
b/doc/posix-functions/swprintf.texi
index b53c96c6d7..cd20d0e702 100644
--- a/doc/posix-functions/swprintf.texi
+++ b/doc/posix-functions/swprintf.texi
@@ -63,6 +63,10 @@
 argument of @code{strerror(errno)} (or a version of @code{strerror_r})
 instead.
 @item
+In the C or POSIX locales, the @code{%c} and @code{%s} conversions may fail
+on some platforms:
+glibc 2.35.
+@item
 When formatting an integer with grouping flag, this function inserts thousands
 separators even in the "C" locale on some platforms:
 NetBSD 5.1.
diff --git a/doc/posix-functions/vfwprintf.texi 
b/doc/posix-functions/vfwprintf.texi
index 22e4a59212..0712269aa4 100644
--- a/doc/posix-functions/vfwprintf.texi
+++ b/doc/posix-functions/vfwprintf.texi
@@ -34,6 +34,10 @@
 argument of @code{strerror(errno)} (or a version of @code{strerror_r})
 instead.
 @item
+In the C or POSIX locales, the @code{%c} and @code{%s} conversions may fail
+on some platforms:
+glibc 2.35.
+@item
 When formatting an integer with grouping flag, this function inserts thousands
 separators even in the "C" locale on some platforms:
 NetBSD 5.1.
diff --git a/doc/posix-functions/vswprintf.texi 
b/doc/posix-functions/vswprintf.texi
index 4083cfd2a1..7512a2d023 100644
--- a/doc/posix-functions/vswprintf.texi
+++ b/doc/posix-functions/vswprintf.texi
@@ -36,6 +36,10 @@
 argument of @code{strerror(errno)} (or a version of @code{strerror_r})
 instead.
 @item
+In the C or POSIX locales, the @code{%c} and @code{%s} conversions may fail
+on some platforms:
+glibc 2.35.
+@item
 When formatting an integer with grouping flag, this function inserts thousands
 separators even in the "C" locale on some platforms:
 NetBSD 5.1.
diff --git a/doc/posix-functions/vwprintf.texi 
b/doc/posix-functions/vwprintf.texi
index 7b029c9ceb..1e364288f6 100644
--- a/doc/posix-functions/vwprintf.texi
+++ b/doc/posix-functions/vwprintf.texi
@@ -37,6 +37,10 @@
 argument of @code{strerror(errno)} (or a version of @code{strerror_r})
 instead.
 @item
+In the C or POSIX locales, the @code{%c} and @code{%s} conversions may fail
+on some platforms:
+glibc 2.35.
+@item
 When formatting an integer with grouping flag, this function inserts thousands
 separators even in the "C" locale on some platforms:
 NetBSD 5.1.
diff --git a/doc/posix-functions/wprintf.texi b/doc/posix-functions/wprintf.texi
index 56106b309e..c4d58fab2e 100644
--- a/doc/posix-functions/wprintf.texi
+++ b/doc/posix-functions/wprintf.texi
@@ -37,6 +37,10 @@
 argument of @code{strerror(errno)} (or a version of @code{strerror_r})
 instead.
 @item
+In the C or POSIX locales, the @code{%c} and @code{%s} conversions may fail
+on some platforms:
+glibc 2.35.
+@item
 When formatting an integer with grouping flag, this function inserts thousands
 separators even in the "C" locale on some platforms:
 NetBSD 5.1.
diff --git a/lib/vasnprintf.c b/lib/vasnprintf.c
index 90e52d8a96..111d898a94 100644
--- a/lib/vasnprintf.c
+++ b/lib/vasnprintf.c
@@ -3543,6 +3543,77 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                 }
               }
 #endif
+#if NEED_WPRINTF_DIRECTIVE_C && WIDE_CHAR_VERSION
+            else if (dp->conversion == 'c'
+                     && a.arg[dp->arg_index].type != TYPE_WIDE_CHAR)
+              {
+                /* Implement the 'c' directive ourselves, in order to avoid
+                   EILSEQ in the "C" locale.  */
+                int flags = dp->flags;
+                size_t width;
+
+                width = 0;
+                if (dp->width_start != dp->width_end)
+                  {
+                    if (dp->width_arg_index != ARG_NONE)
+                      {
+                        int arg;
+
+                        if (!(a.arg[dp->width_arg_index].type == TYPE_INT))
+                          abort ();
+                        arg = a.arg[dp->width_arg_index].a.a_int;
+                        width = arg;
+                        if (arg < 0)
+                          {
+                            /* "A negative field width is taken as a '-' flag
+                                followed by a positive field width."  */
+                            flags |= FLAG_LEFT;
+                            width = -width;
+                          }
+                      }
+                    else
+                      {
+                        const FCHAR_T *digitp = dp->width_start;
+
+                        do
+                          width = xsum (xtimes (width, 10), *digitp++ - '0');
+                        while (digitp != dp->width_end);
+                      }
+                  }
+
+                /* %c in vasnwprintf.  See the specification of fwprintf.  */
+                {
+                  char arg = (char) a.arg[dp->arg_index].a.a_char;
+                  mbstate_t state;
+                  wchar_t wc;
+
+                  memset (&state, '\0', sizeof (mbstate_t));
+                  int count = mbrtowc (&wc, &arg, 1, &state);
+                  if (count < 0)
+                    /* Invalid or incomplete multibyte character.  */
+                    goto fail_with_EILSEQ;
+
+                  if (1 < width && !(flags & FLAG_LEFT))
+                    {
+                      size_t n = width - 1;
+                      ENSURE_ALLOCATION (xsum (length, n));
+                      DCHAR_SET (result + length, ' ', n);
+                      length += n;
+                    }
+
+                  ENSURE_ALLOCATION (xsum (length, 1));
+                  result[length++] = wc;
+
+                  if (1 < width && (flags & FLAG_LEFT))
+                    {
+                      size_t n = width - 1;
+                      ENSURE_ALLOCATION (xsum (length, n));
+                      DCHAR_SET (result + length, ' ', n);
+                      length += n;
+                    }
+                }
+              }
+#endif
 #if NEED_PRINTF_DIRECTIVE_B || NEED_PRINTF_DIRECTIVE_UPPERCASE_B
             else if (0
 # if NEED_PRINTF_DIRECTIVE_B
@@ -6816,7 +6887,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
     errno = ENOMEM;
     goto fail_with_errno;
 
-#if ENABLE_UNISTDIO || ((!USE_SNPRINTF || WIDE_CHAR_VERSION || 
!HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || 
ENABLE_WCHAR_FALLBACK) && HAVE_WCHAR_T) || ((NEED_PRINTF_DIRECTIVE_LC || 
ENABLE_WCHAR_FALLBACK) && HAVE_WINT_T && !WIDE_CHAR_VERSION)
+#if ENABLE_UNISTDIO || ((!USE_SNPRINTF || WIDE_CHAR_VERSION || 
!HAVE_SNPRINTF_RETVAL_C99 || USE_MSVC__SNPRINTF || NEED_PRINTF_DIRECTIVE_LS || 
ENABLE_WCHAR_FALLBACK) && HAVE_WCHAR_T) || ((NEED_PRINTF_DIRECTIVE_LC || 
ENABLE_WCHAR_FALLBACK) && HAVE_WINT_T && !WIDE_CHAR_VERSION) || 
(NEED_WPRINTF_DIRECTIVE_C && WIDE_CHAR_VERSION)
   fail_with_EILSEQ:
     errno = EILSEQ;
     goto fail_with_errno;
diff --git a/m4/vasnprintf.m4 b/m4/vasnprintf.m4
index 929f3d80af..86133b30b1 100644
--- a/m4/vasnprintf.m4
+++ b/m4/vasnprintf.m4
@@ -1,4 +1,4 @@
-# vasnprintf.m4 serial 47
+# vasnprintf.m4 serial 48
 dnl Copyright (C) 2002-2004, 2006-2023 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -105,6 +105,15 @@ AC_DEFUN_ONCE([gl_PREREQ_VASNWPRINTF]
          that contains null wide characters.])
       ;;
   esac
+  gl_MBRTOWC_C_LOCALE
+  case "$gl_cv_func_mbrtowc_C_locale_sans_EILSEQ" in
+    *yes) ;;
+    *)
+      AC_DEFINE([NEED_WPRINTF_DIRECTIVE_C], [1],
+        [Define if the vasnwprintf implementation needs special code for
+         the 'c' directive.])
+      ;;
+  esac
   gl_MUSL_LIBC
   gl_PREREQ_VASNXPRINTF
 ])
diff --git a/modules/vasnwprintf b/modules/vasnwprintf
index 82d4aa4668..70f3fb2c95 100644
--- a/modules/vasnwprintf
+++ b/modules/vasnwprintf
@@ -19,6 +19,7 @@ m4/stdint_h.m4
 m4/inttypes_h.m4
 m4/vasnprintf.m4
 m4/printf.m4
+m4/mbrtowc.m4
 m4/math_h.m4
 m4/exponentd.m4
 m4/musl.m4
diff --git a/tests/test-vasnwprintf-posix.c b/tests/test-vasnwprintf-posix.c
index 74a26079d3..125b9386a4 100644
--- a/tests/test-vasnwprintf-posix.c
+++ b/tests/test-vasnwprintf-posix.c
@@ -3757,6 +3757,28 @@ test_function (wchar_t * (*my_asnwprintf) (wchar_t *, 
size_t *, const wchar_t *,
     free (result);
   }
 
+  /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
+     "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
+     locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
+     that is, effectively the "C.UTF-8" locale.  */
+#ifndef __ANDROID__
+  { /* The conversion of %s to wide characters is done as if through repeated
+       invocations of mbrtowc(), and in the "C" and "POSIX" locales, "an
+       [EILSEQ] error cannot occur since all byte values are valid characters",
+       says POSIX:2018.  */
+    int c;
+
+    for (c = 0; c < 0x100; c++)
+      {
+        char s[2] = { c, '\0' };
+        size_t length;
+        wchar_t *result = my_asnwprintf (NULL, &length, L"%s", s);
+        ASSERT (result != NULL);
+        free (result);
+      }
+  }
+#endif
+
 #if HAVE_WCHAR_T
   static wchar_t L_xyz[4] = { 'x', 'y', 'z', 0 };
 
@@ -3960,6 +3982,26 @@ test_function (wchar_t * (*my_asnwprintf) (wchar_t *, 
size_t *, const wchar_t *,
     free (result);
   }
 
+  /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
+     "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
+     locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
+     that is, effectively the "C.UTF-8" locale.  */
+#ifndef __ANDROID__
+  { /* The conversion of %c to wide character is done as if through btowc(),
+       and in the "C" and "POSIX" locales, "btowc() shall not return WEOF if
+       c has a value in the range 0 to 255 inclusive", says POSIX:2018.  */
+    int c;
+
+    for (c = 0; c < 0x100; c++)
+      {
+        size_t length;
+        wchar_t *result = my_asnwprintf (NULL, &length, L"%c", c);
+        ASSERT (result != NULL);
+        free (result);
+      }
+  }
+#endif
+
 #if HAVE_WCHAR_T
   static wint_t L_x = (wchar_t) 'x';
 




Reply via email to