Here's the replacement for a missing mbsrtowcs function. Quite straightforward, except for a small hole in the specification: ISO C 99 does not specify how the state is to be updated. glibc updates it if and only if the dest argument is not NULL; this is what makes most sense.
2008-12-19 Bruno Haible <br...@clisp.org> New module 'mbsrtowcs'. * lib/wchar.in.h (mbsrtowcs): New declaration. * lib/mbsrtowcs.c: New file. * m4/mbsrtowcs.m4: New file. * modules/mbsrtowcs: New file. * m4/wchar.m4 (gl_WCHAR_H_DEFAULTS): Initialize GNULIB_MBSRTOWCS and HAVE_MBSRTOWCS. * modules/wchar (Makefile.am): Substitute GNULIB_MBSRTOWCS and HAVE_MBSRTOWCS. * doc/posix-functions/mbsrtowcs.texi: Document the new module. =============================== lib/mbsrtowcs.c =============================== /* Convert string to wide string. Copyright (C) 2008 Free Software Foundation, Inc. Written by Bruno Haible <br...@clisp.org>, 2008. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <config.h> /* Specification. */ #include <wchar.h> #include <errno.h> #include <limits.h> #include <stdlib.h> #include "strnlen1.h" static mbstate_t internal_state; size_t mbsrtowcs (wchar_t *dest, const char **srcp, size_t len, mbstate_t *ps) { if (ps == NULL) ps = &internal_state; { const char *src = *srcp; if (dest != NULL) { wchar_t *destptr = dest; for (; len > 0; destptr++, len--) { size_t src_avail; size_t ret; /* An optimized variant of src_avail = strnlen1 (src, MB_LEN_MAX); */ if (src[0] == '\0') src_avail = 1; else if (src[1] == '\0') src_avail = 2; else if (src[2] == '\0') src_avail = 3; else if (MB_LEN_MAX <= 4 || src[3] == '\0') src_avail = 4; else src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4); /* Parse the next multibyte character. */ ret = mbrtowc (destptr, src, src_avail, ps); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte or that is longer than MB_LEN_MAX bytes. Cannot happen. */ abort (); if (ret == (size_t)(-1)) goto bad_input; if (ret == 0) { src = NULL; /* Here mbsinit (ps). */ break; } src += ret; } *srcp = src; return destptr - dest; } else { /* Ignore dest and len, don't store *srcp at the end, and don't clobber *ps. */ mbstate_t state = *ps; size_t totalcount = 0; for (;; totalcount++) { size_t src_avail; size_t ret; /* An optimized variant of src_avail = strnlen1 (src, MB_LEN_MAX); */ if (src[0] == '\0') src_avail = 1; else if (src[1] == '\0') src_avail = 2; else if (src[2] == '\0') src_avail = 3; else if (MB_LEN_MAX <= 4 || src[3] == '\0') src_avail = 4; else src_avail = 4 + strnlen1 (src + 4, MB_LEN_MAX - 4); /* Parse the next multibyte character. */ ret = mbrtowc (NULL, src, src_avail, &state); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte or that is longer than MB_LEN_MAX bytes. Cannot happen. */ abort (); if (ret == (size_t)(-1)) goto bad_input2; if (ret == 0) { /* Here mbsinit (&state). */ break; } src += ret; } return totalcount; } bad_input: *srcp = src; bad_input2: errno = EILSEQ; return (size_t)(-1); } } =============================== m4/mbsrtowcs.m4 =============================== # mbsrtowcs.m4 serial 1 dnl Copyright (C) 2008 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_MBSRTOWCS], [ AC_REQUIRE([gl_WCHAR_H_DEFAULTS]) AC_REQUIRE([AC_TYPE_MBSTATE_T]) AC_CHECK_FUNCS_ONCE([mbsrtowcs]) if test $ac_cv_func_mbsrtowcs = no; then HAVE_MBSRTOWCS=0 gl_REPLACE_WCHAR_H AC_LIBOBJ([mbsrtowcs]) gl_PREREQ_MBSRTOWCS fi ]) # Prerequisites of lib/mbsrtowcs.c. AC_DEFUN([gl_PREREQ_MBSRTOWCS], [ : ]) ============================== modules/mbsrtowcs ============================== Description: mbsrtowcs() function: convert string to wide string. Files: lib/mbsrtowcs.c m4/mbsrtowcs.m4 m4/mbstate_t.m4 Depends-on: wchar mbrtowc strnlen1 configure.ac: gl_FUNC_MBSRTOWCS gl_WCHAR_MODULE_INDICATOR([mbsrtowcs]) Makefile.am: Include: <wchar.h> License: LGPL Maintainer: Bruno Haible =============================================================================== --- lib/wchar.in.h.orig 2008-12-20 01:55:20.000000000 +0100 +++ lib/wchar.in.h 2008-12-19 23:48:14.000000000 +0100 @@ -151,6 +151,20 @@ #endif +/* Convert a string to a wide string. */ +#if @GNULIB_MBSRTOWCS@ +# if !...@have_mbsrtowcs@ +extern size_t mbsrtowcs (wchar_t *dest, const char **srcp, size_t len, mbstate_t *ps); +# endif +#elif defined GNULIB_POSIXCHECK +# undef mbsrtowcs +# define mbsrtowcs(d,s,l,p) \ + (GL_LINK_WARNING ("mbsrtowcs is unportable - " \ + "use gnulib module mbsrtowcs for portability"), \ + mbsrtowcs (d, s, l, p)) +#endif + + /* Return the number of screen columns needed for WC. */ #if @GNULIB_WCWIDTH@ # if @REPLACE_WCWIDTH@ --- m4/wchar.m4.orig 2008-12-20 01:55:20.000000000 +0100 +++ m4/wchar.m4 2008-12-19 23:50:50.000000000 +0100 @@ -7,7 +7,7 @@ dnl Written by Eric Blake. -# wchar.m4 serial 10 +# wchar.m4 serial 11 AC_DEFUN([gl_WCHAR_H], [ @@ -61,18 +61,20 @@ AC_DEFUN([gl_WCHAR_H_DEFAULTS], [ - GNULIB_BTOWC=0; AC_SUBST([GNULIB_BTOWC]) - GNULIB_WCTOB=0; AC_SUBST([GNULIB_WCTOB]) - GNULIB_MBSINIT=0; AC_SUBST([GNULIB_MBSINIT]) - GNULIB_MBRTOWC=0; AC_SUBST([GNULIB_MBRTOWC]) - GNULIB_MBRLEN=0; AC_SUBST([GNULIB_MBRLEN]) - GNULIB_WCWIDTH=0; AC_SUBST([GNULIB_WCWIDTH]) + GNULIB_BTOWC=0; AC_SUBST([GNULIB_BTOWC]) + GNULIB_WCTOB=0; AC_SUBST([GNULIB_WCTOB]) + GNULIB_MBSINIT=0; AC_SUBST([GNULIB_MBSINIT]) + GNULIB_MBRTOWC=0; AC_SUBST([GNULIB_MBRTOWC]) + GNULIB_MBRLEN=0; AC_SUBST([GNULIB_MBRLEN]) + GNULIB_MBSRTOWCS=0; AC_SUBST([GNULIB_MBSRTOWCS]) + GNULIB_WCWIDTH=0; AC_SUBST([GNULIB_WCWIDTH]) dnl Assume proper GNU behavior unless another module says otherwise. HAVE_BTOWC=1; AC_SUBST([HAVE_BTOWC]) HAVE_WCTOB=1; AC_SUBST([HAVE_WCTOB]) HAVE_MBSINIT=1; AC_SUBST([HAVE_MBSINIT]) HAVE_MBRTOWC=1; AC_SUBST([HAVE_MBRTOWC]) HAVE_MBRLEN=1; AC_SUBST([HAVE_MBRLEN]) + HAVE_MBSRTOWCS=1; AC_SUBST([HAVE_MBSRTOWCS]) HAVE_DECL_WCWIDTH=1; AC_SUBST([HAVE_DECL_WCWIDTH]) REPLACE_WCWIDTH=0; AC_SUBST([REPLACE_WCWIDTH]) WCHAR_H=''; AC_SUBST([WCHAR_H]) --- modules/wchar.orig 2008-12-20 01:55:20.000000000 +0100 +++ modules/wchar 2008-12-19 23:49:38.000000000 +0100 @@ -30,6 +30,7 @@ -e 's|@''GNULIB_MBSINIT''@|$(GNULIB_MBSINIT)|g' \ -e 's|@''GNULIB_MBRTOWC''@|$(GNULIB_MBRTOWC)|g' \ -e 's|@''GNULIB_MBRLEN''@|$(GNULIB_MBRLEN)|g' \ + -e 's|@''GNULIB_MBSRTOWCS''@|$(GNULIB_MBSRTOWCS)|g' \ -e 's|@''GNULIB_WCWIDTH''@|$(GNULIB_WCWIDTH)|g' \ -e 's|@''HAVE_WINT_T''@|$(HAVE_WINT_T)|g' \ -e 's|@''HAVE_BTOWC''@|$(HAVE_BTOWC)|g' \ @@ -37,6 +38,7 @@ -e 's|@''HAVE_MBSINIT''@|$(HAVE_MBSINIT)|g' \ -e 's|@''HAVE_MBRTOWC''@|$(HAVE_MBRTOWC)|g' \ -e 's|@''HAVE_MBRLEN''@|$(HAVE_MBRLEN)|g' \ + -e 's|@''HAVE_MBSRTOWCS''@|$(HAVE_MBSRTOWCS)|g' \ -e 's|@''HAVE_DECL_WCWIDTH''@|$(HAVE_DECL_WCWIDTH)|g' \ -e 's|@''REPLACE_WCWIDTH''@|$(REPLACE_WCWIDTH)|g' \ -e '/definition of GL_LINK_WARNING/r $(LINK_WARNING_H)' \ --- doc/posix-functions/mbsrtowcs.texi.orig 2008-12-20 01:55:20.000000000 +0100 +++ doc/posix-functions/mbsrtowcs.texi 2008-12-19 23:55:41.000000000 +0100 @@ -4,18 +4,18 @@ POSIX specification: @url{http://www.opengroup.org/onlinepubs/9699919799/functions/mbsrtowcs.html} -Gnulib module: --- +Gnulib module: mbsrtowcs Portability problems fixed by Gnulib: @itemize +...@item +This function is missing on some platforms: +HP-UX 11, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5. @end itemize Portability problems not fixed by Gnulib: @itemize @item -This function is missing on some platforms: -HP-UX 11, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5. -...@item On Windows platforms, @code{wchar_t} is a 16-bit type and therefore cannot accommodate all Unicode characters. @end itemize