ISO C 23 § 7.24.1.7 in combination with § 6.4.4.1 specifies that strtol, strtoll, strtoul, strtoull need to accept binary integer syntax as input, e.g. "0b101010" for 42.
This patch implements it, and augments the unit tests accordingly. 2023-03-16 Bruno Haible <br...@clisp.org> strtol, strtoll, strtoul, strtoull: Make ISO C 23 compliant. * lib/strtol.c (INTERNAL (strtol)): Treat 'b' and base 2 like 'x' and base 16. Based on glibc commit 64924422a99690d147a166b4de3103f3bf3eaf6c by Joseph Myers. * m4/strtol.m4 (gl_FUNC_STRTOL): Test also whether parsing binary integers works. Update cross-compilation guesses. * m4/strtoll.m4 (gl_FUNC_STRTOLL): Likewise. * m4/strtoul.m4 (gl_FUNC_STRTOUL): Likewise. * m4/strtoull.m4 (gl_FUNC_STRTOULL): Likewise. * tests/test-strtol.c (main): Add tests of parsing binary integers. * tests/test-strtoll.c (main): Likewise. * tests/test-strtoul.c (main): Likewise. * tests/test-strtoull.c (main): Likewise. * doc/posix-functions/strtol.texi: Mention the problem with parsing binary integers. * doc/posix-functions/strtoll.texi: Likewise. * doc/posix-functions/strtoul.texi: Likewise. * doc/posix-functions/strtoull.texi: Likewise. diff --git a/doc/posix-functions/strtol.texi b/doc/posix-functions/strtol.texi index 5ebcb284d4..061deea1b3 100644 --- a/doc/posix-functions/strtol.texi +++ b/doc/posix-functions/strtol.texi @@ -14,6 +14,9 @@ This function does not parse the leading @samp{0} when the input string is @code{"0x"} and the base is 16 or 0 on some platforms: Minix 3.3, mingw, MSVC 14. +@item +This function does not parse binary integers (with a @samp{0b} or @samp{0B} +prefix) when the base is 2 or 0 on many platforms. @end itemize Portability problems not fixed by Gnulib: diff --git a/doc/posix-functions/strtoll.texi b/doc/posix-functions/strtoll.texi index b507fc33c6..42f73d3449 100644 --- a/doc/posix-functions/strtoll.texi +++ b/doc/posix-functions/strtoll.texi @@ -15,6 +15,9 @@ This function does not parse the leading @samp{0} when the input string is @code{"0x"} and the base is 16 or 0 on some platforms: Minix 3.3, mingw, MSVC 14. +@item +This function does not parse binary integers (with a @samp{0b} or @samp{0B} +prefix) when the base is 2 or 0 on many platforms. @end itemize Portability problems not fixed by Gnulib: diff --git a/doc/posix-functions/strtoul.texi b/doc/posix-functions/strtoul.texi index 3b1fa7fb5e..c3a97ee39a 100644 --- a/doc/posix-functions/strtoul.texi +++ b/doc/posix-functions/strtoul.texi @@ -14,6 +14,9 @@ This function does not parse the leading @samp{0} when the input string is @code{"0x"} and the base is 16 or 0 on some platforms: Minix 3.3, mingw, MSVC 14. +@item +This function does not parse binary integers (with a @samp{0b} or @samp{0B} +prefix) when the base is 2 or 0 on many platforms. @end itemize Portability problems not fixed by Gnulib: diff --git a/doc/posix-functions/strtoull.texi b/doc/posix-functions/strtoull.texi index 68ede343e1..ba976a77e6 100644 --- a/doc/posix-functions/strtoull.texi +++ b/doc/posix-functions/strtoull.texi @@ -15,6 +15,9 @@ This function does not parse the leading @samp{0} when the input string is @code{"0x"} and the base is 16 or 0 on some platforms: Minix 3.3, mingw, MSVC 14. +@item +This function does not parse binary integers (with a @samp{0b} or @samp{0B} +prefix) when the base is 2 or 0 on many platforms. @end itemize Portability problems not fixed by Gnulib: diff --git a/lib/strtol.c b/lib/strtol.c index d11269b262..b93483dca4 100644 --- a/lib/strtol.c +++ b/lib/strtol.c @@ -288,6 +288,11 @@ INTERNAL (strtol) (const STRING_TYPE *nptr, STRING_TYPE **endptr, s += 2; base = 16; } + else if ((base == 0 || base == 2) && TOUPPER (s[1]) == L_('B')) + { + s += 2; + base = 2; + } else if (base == 0) base = 8; } @@ -378,11 +383,14 @@ INTERNAL (strtol) (const STRING_TYPE *nptr, STRING_TYPE **endptr, noconv: /* We must handle a special case here: the base is 0 or 16 and the first two characters are '0' and 'x', but the rest are no - hexadecimal digits. This is no error case. We return 0 and - ENDPTR points to the 'x'. */ + hexadecimal digits. Likewise when the base is 0 or 2 and the + first two characters are '0' and 'b', but the rest are no binary + digits. This is no error case. We return 0 and ENDPTR points to + the 'x' or 'b'. */ if (endptr != NULL) { - if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X') + if (save - nptr >= 2 + && (TOUPPER (save[-1]) == L_('X') || TOUPPER (save[-1]) == L_('B')) && save[-2] == L_('0')) *endptr = (STRING_TYPE *) &save[-1]; else diff --git a/m4/strtol.m4 b/m4/strtol.m4 index 7e3b0d0044..5cdd86adc7 100644 --- a/m4/strtol.m4 +++ b/m4/strtol.m4 @@ -1,4 +1,4 @@ -# strtol.m4 serial 7 +# strtol.m4 serial 8 dnl Copyright (C) 2002-2003, 2006, 2009-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -24,15 +24,26 @@ AC_DEFUN([gl_FUNC_STRTOL] if (term != input + 1) result |= 1; } + /* This test fails on pre-C23 platforms. */ + { + const char input[] = "0b1"; + (void) strtol (input, &term, 2); + if (term != input + 3) + result |= 2; + } return result; ]]) ], [gl_cv_func_strtol_works=yes], [gl_cv_func_strtol_works=no], [case "$host_os" in - # Guess no on native Windows. - mingw*) gl_cv_func_strtol_works="guessing no" ;; - *) gl_cv_func_strtol_works="$gl_cross_guess_normal" ;; + # Guess no on native Windows. + mingw*) gl_cv_func_strtol_works="guessing no" ;; + # Guess no on glibc systems. + *-gnu* | gnu*) gl_cv_func_strtol_works="guessing no" ;; + # Guess no on musl systems. + *-musl* | midipix*) gl_cv_func_strtol_works="guessing no" ;; + *) gl_cv_func_strtol_works="$gl_cross_guess_normal" ;; esac ]) ]) diff --git a/m4/strtoll.m4 b/m4/strtoll.m4 index ede630c606..ec09609cd4 100644 --- a/m4/strtoll.m4 +++ b/m4/strtoll.m4 @@ -1,4 +1,4 @@ -# strtoll.m4 serial 9 +# strtoll.m4 serial 10 dnl Copyright (C) 2002, 2004, 2006, 2008-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -24,15 +24,26 @@ AC_DEFUN([gl_FUNC_STRTOLL] if (term != input + 1) result |= 1; } + /* This test fails on pre-C23 platforms. */ + { + const char input[] = "0b1"; + (void) strtoll (input, &term, 2); + if (term != input + 3) + result |= 2; + } return result; ]]) ], [gl_cv_func_strtoll_works=yes], [gl_cv_func_strtoll_works=no], [case "$host_os" in - # Guess no on native Windows. - mingw*) gl_cv_func_strtoll_works="guessing no" ;; - *) gl_cv_func_strtoll_works="$gl_cross_guess_normal" ;; + # Guess no on native Windows. + mingw*) gl_cv_func_strtoll_works="guessing no" ;; + # Guess no on glibc systems. + *-gnu* | gnu*) gl_cv_func_strtoll_works="guessing no" ;; + # Guess no on musl systems. + *-musl* | midipix*) gl_cv_func_strtoll_works="guessing no" ;; + *) gl_cv_func_strtoll_works="$gl_cross_guess_normal" ;; esac ]) ]) diff --git a/m4/strtoul.m4 b/m4/strtoul.m4 index 30a83d7e19..9baa9729e6 100644 --- a/m4/strtoul.m4 +++ b/m4/strtoul.m4 @@ -1,4 +1,4 @@ -# strtoul.m4 serial 6 +# strtoul.m4 serial 7 dnl Copyright (C) 2002, 2006, 2009-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -24,15 +24,26 @@ AC_DEFUN([gl_FUNC_STRTOUL] if (term != input + 1) result |= 1; } + /* This test fails on pre-C23 platforms. */ + { + const char input[] = "0b1"; + (void) strtoul (input, &term, 2); + if (term != input + 3) + result |= 2; + } return result; ]]) ], [gl_cv_func_strtoul_works=yes], [gl_cv_func_strtoul_works=no], [case "$host_os" in - # Guess no on native Windows. - mingw*) gl_cv_func_strtoul_works="guessing no" ;; - *) gl_cv_func_strtoul_works="$gl_cross_guess_normal" ;; + # Guess no on native Windows. + mingw*) gl_cv_func_strtoul_works="guessing no" ;; + # Guess no on glibc systems. + *-gnu* | gnu*) gl_cv_func_strtoul_works="guessing no" ;; + # Guess no on musl systems. + *-musl* | midipix*) gl_cv_func_strtoul_works="guessing no" ;; + *) gl_cv_func_strtoul_works="$gl_cross_guess_normal" ;; esac ]) ]) diff --git a/m4/strtoull.m4 b/m4/strtoull.m4 index a9b0ddf9f2..4f895c76af 100644 --- a/m4/strtoull.m4 +++ b/m4/strtoull.m4 @@ -1,4 +1,4 @@ -# strtoull.m4 serial 9 +# strtoull.m4 serial 10 dnl Copyright (C) 2002, 2004, 2006, 2008-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -24,15 +24,26 @@ AC_DEFUN([gl_FUNC_STRTOULL] if (term != input + 1) result |= 1; } + /* This test fails on pre-C23 platforms. */ + { + const char input[] = "0b1"; + (void) strtoull (input, &term, 2); + if (term != input + 3) + result |= 2; + } return result; ]]) ], [gl_cv_func_strtoull_works=yes], [gl_cv_func_strtoull_works=no], [case "$host_os" in - # Guess no on native Windows. - mingw*) gl_cv_func_strtoull_works="guessing no" ;; - *) gl_cv_func_strtoull_works="$gl_cross_guess_normal" ;; + # Guess no on native Windows. + mingw*) gl_cv_func_strtoull_works="guessing no" ;; + # Guess no on glibc systems. + *-gnu* | gnu*) gl_cv_func_strtoull_works="guessing no" ;; + # Guess no on musl systems. + *-musl* | midipix*) gl_cv_func_strtoull_works="guessing no" ;; + *) gl_cv_func_strtoull_works="$gl_cross_guess_normal" ;; esac ]) ]) diff --git a/tests/test-strtol.c b/tests/test-strtol.c index ba8a54de4d..065b442ba5 100644 --- a/tests/test-strtol.c +++ b/tests/test-strtol.c @@ -239,5 +239,67 @@ main (void) ASSERT (errno == 0); } + /* Binary integer syntax. */ + { + const char input[] = "0b111010"; + char *ptr; + long result; + errno = 0; + result = strtol (input, &ptr, 10); + ASSERT (result == 0L); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + long result; + errno = 0; + result = strtol (input, &ptr, 2); + ASSERT (result == 58L); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + long result; + errno = 0; + result = strtol (input, &ptr, 0); + ASSERT (result == 58L); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + long result; + errno = 0; + result = strtol (input, &ptr, 10); + ASSERT (result == 0L); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + long result; + errno = 0; + result = strtol (input, &ptr, 2); + ASSERT (result == 0L); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + long result; + errno = 0; + result = strtol (input, &ptr, 0); + ASSERT (result == 0L); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + return 0; } diff --git a/tests/test-strtoll.c b/tests/test-strtoll.c index ecedbe6460..24cb4eb1e1 100644 --- a/tests/test-strtoll.c +++ b/tests/test-strtoll.c @@ -239,5 +239,67 @@ main (void) ASSERT (errno == 0); } + /* Binary integer syntax. */ + { + const char input[] = "0b111010"; + char *ptr; + long long result; + errno = 0; + result = strtoll (input, &ptr, 10); + ASSERT (result == 0LL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + long long result; + errno = 0; + result = strtoll (input, &ptr, 2); + ASSERT (result == 58LL); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + long long result; + errno = 0; + result = strtoll (input, &ptr, 0); + ASSERT (result == 58LL); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + long long result; + errno = 0; + result = strtoll (input, &ptr, 10); + ASSERT (result == 0LL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + long long result; + errno = 0; + result = strtoll (input, &ptr, 2); + ASSERT (result == 0LL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + long long result; + errno = 0; + result = strtoll (input, &ptr, 0); + ASSERT (result == 0LL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + return 0; } diff --git a/tests/test-strtoul.c b/tests/test-strtoul.c index 7e8e5f78c6..434cea49ed 100644 --- a/tests/test-strtoul.c +++ b/tests/test-strtoul.c @@ -238,5 +238,67 @@ main (void) ASSERT (errno == 0); } + /* Binary integer syntax. */ + { + const char input[] = "0b111010"; + char *ptr; + unsigned long result; + errno = 0; + result = strtoul (input, &ptr, 10); + ASSERT (result == 0UL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + unsigned long result; + errno = 0; + result = strtoul (input, &ptr, 2); + ASSERT (result == 58UL); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + unsigned long result; + errno = 0; + result = strtoul (input, &ptr, 0); + ASSERT (result == 58UL); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + unsigned long result; + errno = 0; + result = strtoul (input, &ptr, 10); + ASSERT (result == 0UL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + unsigned long result; + errno = 0; + result = strtoul (input, &ptr, 2); + ASSERT (result == 0UL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + unsigned long result; + errno = 0; + result = strtoul (input, &ptr, 0); + ASSERT (result == 0UL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + return 0; } diff --git a/tests/test-strtoull.c b/tests/test-strtoull.c index dd6ec2ad19..7b0027f621 100644 --- a/tests/test-strtoull.c +++ b/tests/test-strtoull.c @@ -238,5 +238,67 @@ main (void) ASSERT (errno == 0); } + /* Binary integer syntax. */ + { + const char input[] = "0b111010"; + char *ptr; + unsigned long long result; + errno = 0; + result = strtoull (input, &ptr, 10); + ASSERT (result == 0ULL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + unsigned long long result; + errno = 0; + result = strtoull (input, &ptr, 2); + ASSERT (result == 58ULL); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b111010"; + char *ptr; + unsigned long long result; + errno = 0; + result = strtoull (input, &ptr, 0); + ASSERT (result == 58ULL); + ASSERT (ptr == input + 8); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + unsigned long long result; + errno = 0; + result = strtoull (input, &ptr, 10); + ASSERT (result == 0ULL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + unsigned long long result; + errno = 0; + result = strtoull (input, &ptr, 2); + ASSERT (result == 0ULL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + { + const char input[] = "0b"; + char *ptr; + unsigned long long result; + errno = 0; + result = strtoull (input, &ptr, 0); + ASSERT (result == 0ULL); + ASSERT (ptr == input + 1); + ASSERT (errno == 0); + } + return 0; }