Diff below fixes our wcsto{f,d,ld}() implementations in the following ways:
- It handles "inf", "infinity", "nan", and "nan(whatever)" forms. - It rejects strings like "-" and "+" by excluding the sign characters when checking that we've actually matched some characters to parse. - Because "nan(whatever)" allows any characters, the character conversion form can't assume a 1-wchar_t-to-1-char mapping. Instead, we need to call wcsnrtombs() once to figure out how large a buffer to allocate, wcsnrtombs() again to convert the characters, and finally mbsnrtowcs() afterwards to count how many wide characters strtod() actually consumed. - Sets *endptr = nptr correctly for all failure cases. With these changes, libc++'s std::sto{f,d,ld}() unit tests pass for wide character strings. ok? Index: locale/_wcstod.h =================================================================== RCS file: /home/matthew/anoncvs/cvs/src/lib/libc/locale/_wcstod.h,v retrieving revision 1.1 diff -u -p -r1.1 _wcstod.h --- locale/_wcstod.h 13 Jan 2009 18:18:31 -0000 1.1 +++ locale/_wcstod.h 2 Jun 2013 06:11:12 -0000 @@ -44,6 +44,7 @@ FUNCNAME(const wchar_t *nptr, wchar_t ** const wchar_t *src; size_t size; const wchar_t *start; + const wchar_t *aftersign; /* * check length of string and call strtod @@ -59,6 +60,24 @@ FUNCNAME(const wchar_t *nptr, wchar_t ** start = src; if (*src && wcschr(L"+-", *src)) src++; + aftersign = src; + if (wcsncasecmp(src, L"inf", 3) == 0) { + src += 3; + if (wcsncasecmp(src, L"inity", 5) == 0) + src += 5; + goto match; + } + if (wcsncasecmp(src, L"nan", 3) == 0) { + src += 3; + if (*src == L'(') { + size = 1; + while (src[size] != L'\0' && src[size] != L')') + size++; + if (src[size] == L')') + src += size + 1; + } + goto match; + } size = wcsspn(src, L"0123456789"); src += size; if (*src == L'.') {/* XXX use localeconv */ @@ -73,56 +92,62 @@ FUNCNAME(const wchar_t *nptr, wchar_t ** size = wcsspn(src, L"0123456789"); src += size; } +match: size = src - start; /* * convert to a char-string and pass it to strtod. - * - * since all mb chars used to represent a double-constant - * are in the portable character set, we can assume - * that they are 1-byte chars. */ - if (size) - { + if (src > aftersign) { mbstate_t st; char *buf; char *end; const wchar_t *s; size_t size_converted; float_type result; - - buf = malloc(size + 1); + size_t bufsize; + + s = start; + memset(&st, 0, sizeof(st)); + bufsize = wcsnrtombs(NULL, &s, size, 0, &st); + + buf = malloc(bufsize + 1); if (!buf) { - /* error */ errno = ENOMEM; /* XXX */ - return 0; + goto fail; } - + s = start; memset(&st, 0, sizeof(st)); - size_converted = wcsrtombs(buf, &s, size, &st); - if (size != size_converted) { + size_converted = wcsnrtombs(buf, &s, size, bufsize, &st); + if (size_converted != bufsize) { /* XXX should not happen */ free(buf); errno = EILSEQ; - return 0; + goto fail; } - buf[size] = 0; + buf[bufsize] = 0; result = STRTOD_FUNC(buf, &end); - free(buf); + if (endptr) { + const char *s = buf; + memset(&st, 0, sizeof(st)); + size = mbsnrtowcs(NULL, &s, end - buf, 0, &st); - if (endptr) /* LINTED bad interface */ - *endptr = (wchar_t*)start + (end - buf); + *endptr = (wchar_t*)start + size; + } + + free(buf); return result; } +fail: if (endptr) /* LINTED bad interface */ - *endptr = (wchar_t*)start; + *endptr = (wchar_t*)nptr; return 0; }