Despite more platform bugs, here's the module for a POSIX compliant wcsrtombs() function.
2008-12-21 Bruno Haible <br...@clisp.org> New module 'wcsrtombs'. * lib/wchar.in.h (wcsrtombs): New declaration. * lib/wcsrtombs.c: New file. * m4/wcsrtombs.m4: New file. * m4/wchar.m4 (gl_WCHAR_H_DEFAULTS): Initialize GNULIB_WCSRTOMBS, HAVE_WCSRTOMBS, REPLACE_WCSRTOMBS. * modules/wchar (Makefile.am): Substitute GNULIB_WCSRTOMBS, HAVE_WCSRTOMBS, REPLACE_WCSRTOMBS. * modules/wcsrtombs: New file. * doc/posix-functions/wcsrtombs.texi: Mention the new module and the bugs. =============================== lib/wcsrtombs.c =============================== /* Convert wide string to string. Copyright (C) 2008 Free Software Foundation, Inc. Written by Bruno Haible <br...@clisp.org>, 2008. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <config.h> /* Specification. */ #include <wchar.h> static mbstate_t internal_state; #if HAVE_WCSRTOMBS && !WCSRTOMBS_TERMINATION_BUG /* Override the system's wcsrtombs() function. */ # undef wcsrtombs size_t rpl_wcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps) { if (ps == NULL) ps = &internal_state; # if WCSRTOMBS_NULL_ARG_BUG if (dest == NULL) { const wchar_t *temp_src = *srcp; return wcsrtombs (NULL, &temp_src, len, ps); } else # endif return wcsrtombs (dest, srcp, len, ps); } #else /* Implement wcsrtombs on top of wcrtomb(). */ # include <errno.h> # include <stdlib.h> # include <string.h> size_t wcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps) { if (ps == NULL) ps = &internal_state; { const wchar_t *src = *srcp; size_t cur_max = MB_CUR_MAX; char buf[64]; if (!(cur_max <= sizeof (buf))) abort (); if (dest != NULL) { char *destptr = dest; for (; len > 0; src++) { wchar_t wc = *src; size_t ret = wcrtomb (len >= cur_max ? destptr : buf, wc, ps); if (ret == (size_t)(-1)) goto bad_input; if (!(ret <= cur_max)) abort (); if (len < ret) break; if (len < cur_max) memcpy (destptr, buf, ret); if (wc == 0) { src = NULL; /* Here mbsinit (ps). */ break; } destptr += ret; len -= ret; } *srcp = src; return destptr - dest; } else { /* Ignore dest and len, don't store *srcp at the end, and don't clobber *ps. */ mbstate_t state = *ps; size_t totalcount = 0; for (;; src++) { wchar_t wc = *src; size_t ret = wcrtomb (buf, wc, &state); if (ret == (size_t)(-1)) goto bad_input2; if (wc == 0) { /* Here mbsinit (&state). */ break; } totalcount += ret; } return totalcount; } bad_input: *srcp = src; bad_input2: errno = EILSEQ; return (size_t)(-1); } } #endif =============================== m4/wcsrtombs.m4 =============================== # wcsrtombs.m4 serial 1 dnl Copyright (C) 2008 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_WCSRTOMBS], [ AC_REQUIRE([gl_WCHAR_H_DEFAULTS]) AC_REQUIRE([AC_TYPE_MBSTATE_T]) AC_CHECK_FUNCS_ONCE([wcsrtombs]) if test $ac_cv_func_wcsrtombs = no; then HAVE_WCSRTOMBS=0 else gl_WCSRTOMBS_TERMINATION gl_WCSRTOMBS_NULL case "$gl_cv_func_wcsrtombs_termination" in *yes) ;; *) AC_DEFINE([WCSRTOMBS_TERMINATION_BUG], [1], [Define if the wcsrtombs function may set the source pointer to NULL without NUL-terminating the destination.]) REPLACE_WCSRTOMBS=1 ;; esac case "$gl_cv_func_wcsrtombs_null" in *yes) ;; *) AC_DEFINE([WCSRTOMBS_NULL_ARG_BUG], [1], [Define if the wcsrtombs function has the NULL destination argument bug.]) REPLACE_WCSRTOMBS=1 ;; esac fi if test $HAVE_WCSRTOMBS = 0 || test $REPLACE_WCSRTOMBS = 1; then gl_REPLACE_WCHAR_H AC_LIBOBJ([wcsrtombs]) gl_PREREQ_WCSRTOMBS fi ]) dnl Test whether the wcsrtombs implementation stores a non-NULL source pointer dnl as long as it has not written the final NUL byte to the destination string. dnl The OSF/1 5.1 implementation stores a NULL pointer already after storing dnl the last non-NUL character, even if there is no more room for the dnl terminating NUL character. dnl Result is gl_cv_func_wcsrtombs_termination. AC_DEFUN([gl_WCSRTOMBS_TERMINATION], [ AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([gt_LOCALE_FR]) AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles AC_CACHE_CHECK([whether wcsrtombs updates the source pointer correctly], [gl_cv_func_wcsrtombs_termination], [ dnl Initial guess, used when cross-compiling or when no suitable locale dnl is present. changequote(,)dnl case "$host_os" in # Guess no on OSF/1. osf*) gl_cv_func_wcsrtombs_termination="guessing no" ;; # Guess yes otherwise. *) gl_cv_func_wcsrtombs_termination="guessing yes" ;; esac changequote([,])dnl if test $LOCALE_FR != none; then AC_TRY_RUN([ #include <locale.h> #include <stdlib.h> #include <wchar.h> int main () { if (setlocale (LC_ALL, "$LOCALE_FR") != NULL) { const char original[] = "B\374\337er"; wchar_t input[10]; if (mbstowcs (input, original, 10) == 5) { const wchar_t *src = input; char output[5]; wcsrtombs (output, &src, 5, NULL); if (src != input + 5) return 1; } } return 0; }], [gl_cv_func_wcsrtombs_termination=yes], [gl_cv_func_wcsrtombs_termination=no], []) fi ]) ]) dnl Test whether the wcsrtombs implementation supports a NULL argument dnl correctly. This is not the case on HP-UX 11.11 and OSF/1 5.1: These dnl implementations updates the source pointer also if the destination argument dnl is NULL. dnl Result is gl_cv_func_wcsrtombs_null. AC_DEFUN([gl_WCSRTOMBS_NULL], [ AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([gt_LOCALE_FR]) AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles AC_CACHE_CHECK([whether wcsrtombs supports a NULL argument correctly], [gl_cv_func_wcsrtombs_null], [ dnl Initial guess, used when cross-compiling or when no suitable locale dnl is present. changequote(,)dnl case "$host_os" in # Guess no on HP-UX and OSF/1. hpux* | osf*) gl_cv_func_wcsrtombs_null="guessing no" ;; # Guess yes otherwise. *) gl_cv_func_wcsrtombs_null="guessing yes" ;; esac changequote([,])dnl if test $LOCALE_FR != none; then AC_TRY_RUN([ #include <locale.h> #include <stdlib.h> #include <wchar.h> int main () { if (setlocale (LC_ALL, "$LOCALE_FR") != NULL) { const char original[] = "B\374\337er"; wchar_t input[10]; if (mbstowcs (input, original, 10) == 5) { const wchar_t *src = input; wcsrtombs (NULL, &src, 10, NULL); if (src != input) return 1; } } return 0; }], [gl_cv_func_wcsrtombs_null=yes], [gl_cv_func_wcsrtombs_null=no], []) fi ]) ]) # Prerequisites of lib/wcsrtombs.c. AC_DEFUN([gl_PREREQ_WCSRTOMBS], [ : ]) ============================== modules/wcsrtombs ============================== Description: wcsrtombs() function: convert wide string to string. Files: lib/wcsrtombs.c m4/wcsrtombs.m4 m4/mbstate_t.m4 m4/locale-fr.m4 m4/codeset.m4 Depends-on: wchar wcrtomb configure.ac: gl_FUNC_WCSRTOMBS gl_WCHAR_MODULE_INDICATOR([wcsrtombs]) Makefile.am: Include: <wchar.h> License: LGPL Maintainer: Bruno Haible =============================================================================== --- doc/posix-functions/wcsrtombs.texi.orig 2008-12-21 23:42:03.000000000 +0100 +++ doc/posix-functions/wcsrtombs.texi 2008-12-21 23:41:11.000000000 +0100 @@ -4,18 +4,24 @@ POSIX specification: @url{http://www.opengroup.org/onlinepubs/9699919799/functions/wcsrtombs.html} -Gnulib module: --- +Gnulib module: wcsrtombs Portability problems fixed by Gnulib: @itemize +...@item +This function is missing on some platforms: +HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5. +...@item +This function may set the source pointer to NULL before NUL terminating the destination string on some platforms: +OSF/1 5.1. +...@item +This function updates the source pointer also if the destination argument is NULL on some platforms: +HP-UX 11, OSF/1 5.1. @end itemize Portability problems not fixed by Gnulib: @itemize @item -This function is missing on some platforms: -HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5. -...@item On AIX and Windows platforms, @code{wchar_t} is a 16-bit type and therefore cannot accommodate all Unicode characters. @end itemize --- lib/wchar.in.h.orig 2008-12-21 23:42:03.000000000 +0100 +++ lib/wchar.in.h 2008-12-21 22:08:51.000000000 +0100 @@ -223,6 +223,24 @@ #endif +/* Convert a wide string to a string. */ +#if @GNULIB_WCSRTOMBS@ +# if @REPLACE_WCSRTOMBS@ +# undef wcsrtombs +# define wcsrtombs rpl_wcsrtombs +# endif +# if !...@have_wcsrtombs@ || @REPLACE_WCSRTOMBS@ +extern size_t wcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps); +# endif +#elif defined GNULIB_POSIXCHECK +# undef wcsrtombs +# define wcsrtombs(d,s,l,p) \ + (GL_LINK_WARNING ("wcsrtombs is unportable - " \ + "use gnulib module wcsrtombs for portability"), \ + wcsrtombs (d, s, l, p)) +#endif + + /* Return the number of screen columns needed for WC. */ #if @GNULIB_WCWIDTH@ # if @REPLACE_WCWIDTH@ --- m4/wchar.m4.orig 2008-12-21 23:42:03.000000000 +0100 +++ m4/wchar.m4 2008-12-21 22:09:29.000000000 +0100 @@ -7,7 +7,7 @@ dnl Written by Eric Blake. -# wchar.m4 serial 19 +# wchar.m4 serial 20 AC_DEFUN([gl_WCHAR_H], [ @@ -69,6 +69,7 @@ GNULIB_MBSRTOWCS=0; AC_SUBST([GNULIB_MBSRTOWCS]) GNULIB_MBSNRTOWCS=0; AC_SUBST([GNULIB_MBSNRTOWCS]) GNULIB_WCRTOMB=0; AC_SUBST([GNULIB_WCRTOMB]) + GNULIB_WCSRTOMBS=0; AC_SUBST([GNULIB_WCSRTOMBS]) GNULIB_WCWIDTH=0; AC_SUBST([GNULIB_WCWIDTH]) dnl Assume proper GNU behavior unless another module says otherwise. HAVE_BTOWC=1; AC_SUBST([HAVE_BTOWC]) @@ -78,6 +79,7 @@ HAVE_MBSRTOWCS=1; AC_SUBST([HAVE_MBSRTOWCS]) HAVE_MBSNRTOWCS=1; AC_SUBST([HAVE_MBSNRTOWCS]) HAVE_WCRTOMB=1; AC_SUBST([HAVE_WCRTOMB]) + HAVE_WCSRTOMBS=1; AC_SUBST([HAVE_WCSRTOMBS]) HAVE_DECL_WCTOB=1; AC_SUBST([HAVE_DECL_WCTOB]) HAVE_DECL_WCWIDTH=1; AC_SUBST([HAVE_DECL_WCWIDTH]) REPLACE_MBSTATE_T=0; AC_SUBST([REPLACE_MBSTATE_T]) @@ -88,6 +90,7 @@ REPLACE_MBSRTOWCS=0; AC_SUBST([REPLACE_MBSRTOWCS]) REPLACE_MBSNRTOWCS=0;AC_SUBST([REPLACE_MBSNRTOWCS]) REPLACE_WCRTOMB=0; AC_SUBST([REPLACE_WCRTOMB]) + REPLACE_WCSRTOMBS=0; AC_SUBST([REPLACE_WCSRTOMBS]) REPLACE_WCWIDTH=0; AC_SUBST([REPLACE_WCWIDTH]) WCHAR_H=''; AC_SUBST([WCHAR_H]) ]) --- modules/wchar.orig 2008-12-21 23:42:03.000000000 +0100 +++ modules/wchar 2008-12-21 22:09:11.000000000 +0100 @@ -33,6 +33,7 @@ -e 's|@''GNULIB_MBSRTOWCS''@|$(GNULIB_MBSRTOWCS)|g' \ -e 's|@''GNULIB_MBSNRTOWCS''@|$(GNULIB_MBSNRTOWCS)|g' \ -e 's|@''GNULIB_WCRTOMB''@|$(GNULIB_WCRTOMB)|g' \ + -e 's|@''GNULIB_WCSRTOMBS''@|$(GNULIB_WCSRTOMBS)|g' \ -e 's|@''GNULIB_WCWIDTH''@|$(GNULIB_WCWIDTH)|g' \ -e 's|@''HAVE_WINT_T''@|$(HAVE_WINT_T)|g' \ -e 's|@''HAVE_BTOWC''@|$(HAVE_BTOWC)|g' \ @@ -42,6 +43,7 @@ -e 's|@''HAVE_MBSRTOWCS''@|$(HAVE_MBSRTOWCS)|g' \ -e 's|@''HAVE_MBSNRTOWCS''@|$(HAVE_MBSNRTOWCS)|g' \ -e 's|@''HAVE_WCRTOMB''@|$(HAVE_WCRTOMB)|g' \ + -e 's|@''HAVE_WCSRTOMBS''@|$(HAVE_WCSRTOMBS)|g' \ -e 's|@''HAVE_DECL_WCTOB''@|$(HAVE_DECL_WCTOB)|g' \ -e 's|@''HAVE_DECL_WCWIDTH''@|$(HAVE_DECL_WCWIDTH)|g' \ -e 's|@''REPLACE_MBSTATE_T''@|$(REPLACE_MBSTATE_T)|g' \ @@ -52,6 +54,7 @@ -e 's|@''REPLACE_MBSRTOWCS''@|$(REPLACE_MBSRTOWCS)|g' \ -e 's|@''REPLACE_MBSNRTOWCS''@|$(REPLACE_MBSNRTOWCS)|g' \ -e 's|@''REPLACE_WCRTOMB''@|$(REPLACE_WCRTOMB)|g' \ + -e 's|@''REPLACE_WCSRTOMBS''@|$(REPLACE_WCSRTOMBS)|g' \ -e 's|@''REPLACE_WCWIDTH''@|$(REPLACE_WCWIDTH)|g' \ -e '/definition of GL_LINK_WARNING/r $(LINK_WARNING_H)' \ < $(srcdir)/wchar.in.h; \