On Mon, Jun 19, 2023 at 02:03:48PM +0200, Michal Privoznik wrote:
> It's weird that in 2023 there's no reliable and portable way to
> parse strings, but okay.
>
> We started with strtol(). Except, it doesn't work the same on
> Linux and Windows. On Windows it behaves a bit different when it
> comes to parsing strings with base = 0. So we've switched to
> g_ascii_strtoll() which promised to solve this. And it did.
> Except, it introduced another problem. I don't really understand
> why, but I've seen random test failures and all of them claimed
> inability to parse a number (specifically <memory/> from the
> hard coded domain XML from test driver, which IMO is just a
> coincidence because it's the first number we parse).
What platforms are you seeing the failures on ? All platforms or just
on Windows, or just some other specific one?
> The nature of the problem suggests there's a race condition
> somewhere. I suspected glib but I don't know their code well to
> prove it.
The code in question is:
guint64
g_ascii_strtoull (const gchar *nptr,
gchar **endptr,
guint base)
{
#if defined(USE_XLOCALE) && defined(HAVE_STRTOULL_L)
return strtoull_l (nptr, endptr, base, get_C_locale ());
#else
gboolean negative;
guint64 result;
result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
/* Return the result of the appropriate sign. */
return negative ? -result : result;
#endif
}
gint64
g_ascii_strtoll (const gchar *nptr,
gchar **endptr,
guint base)
{
#if defined(USE_XLOCALE) && defined(HAVE_STRTOLL_L)
return strtoll_l (nptr, endptr, base, get_C_locale ());
#else
gboolean negative;
guint64 result;
result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
if (negative && result > (guint64) G_MININT64)
{
errno = ERANGE;
return G_MININT64;
}
else if (!negative && result > (guint64) G_MAXINT64)
{
errno = ERANGE;
return G_MAXINT64;
}
else if (negative)
return - (gint64) result;
else
return (gint64) result;
#endif
}
On Linux, we should take the first branch on the #ifdef, which
is calling strtoll_l just as your patch is switching us to.
I guess Windows would use the g_parse_long_long fallback. I don't
see anything in that code which could be non-thread safe. It is
just a copy of strtol cdoe from glibc.
On the Linux case get_C_locale is
static locale_t
get_C_locale (void)
{
static gsize initialized = FALSE;
static locale_t C_locale = NULL;
if (g_once_init_enter (&initialized))
{
C_locale = newlocale (LC_ALL_MASK, "C", NULL);
g_once_init_leave (&initialized, TRUE);
}
return C_locale;
}
and only way that could fail is if newlocale isn't threadsafe, and
we have other code in libvirt calling newlocale at exact same time
as the *first* call to get_C_locale.
>
> Anyway, using strtoll_l() directly and taking glib out of the
> picture solves the issue.
I'm pretty surprised if it fixes anything ! I'm a little concerned
we might have just masked a bug that's still hiding elsewhere.
>
> Signed-off-by: Michal Privoznik <[email protected]>
> ---
> meson.build | 1 +
> src/util/virstring.c | 82 +++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 79 insertions(+), 4 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index aa391e7178..cd713795f5 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -596,6 +596,7 @@ functions = [
> 'sched_setscheduler',
> 'setgroups',
> 'setrlimit',
> + 'strtoll_l',
> 'symlink',
> 'sysctlbyname',
> ]
> diff --git a/src/util/virstring.c b/src/util/virstring.c
> index 1a13570d30..076e885624 100644
> --- a/src/util/virstring.c
> +++ b/src/util/virstring.c
> @@ -127,8 +127,17 @@ virStrToLong_i(char const *s, char **end_ptr, int base,
> int *result)
> char *p;
> int err;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoll_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoll(s, &p, base);
> +#endif
> err = (errno || (!end_ptr && *p) || p == s || (int) val != val);
> if (end_ptr)
> *end_ptr = p;
> @@ -148,13 +157,23 @@ virStrToLong_ui(char const *s, char **end_ptr, int
> base, unsigned int *result)
> char *p;
> bool err = false;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoull_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoull(s, &p, base);
> +#endif
>
> /* This one's tricky. We _want_ to allow "-1" as shorthand for
> * UINT_MAX regardless of whether long is 32-bit or 64-bit. But
> - * g_ascii_strtoull treats "-1" as ULLONG_MAX, and going from ullong back
> - * to uint differs depending on the size of uint. */
> + * strtoull_l/g_ascii_strtoull treats "-1" as ULLONG_MAX, and
> + * going from ullong back to uint differs depending on the size of
> + * uint. */
> if (memchr(s, '-', p - s)) {
> if (-val > UINT_MAX)
> err = true;
> @@ -180,8 +199,17 @@ virStrToLong_uip(char const *s, char **end_ptr, int
> base, unsigned int *result)
> char *p;
> bool err = false;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoull_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoull(s, &p, base);
> +#endif
> err = (memchr(s, '-', p - s) ||
> errno || (!end_ptr && *p) || p == s || (unsigned int) val != val);
> if (end_ptr)
> @@ -204,13 +232,23 @@ virStrToLong_ul(char const *s, char **end_ptr, int
> base, unsigned long *result)
> char *p;
> bool err = false;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoull_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoull(s, &p, base);
> +#endif
>
> /* This one's tricky. We _want_ to allow "-1" as shorthand for
> * ULONG_MAX regardless of whether long is 32-bit or 64-bit. But
> - * g_ascii_strtoull treats "-1" as ULLONG_MAX, and going from ullong back
> - * to ulong differs depending on the size of ulong. */
> + * strtoull_l/g_ascii_strtoull treats "-1" as ULLONG_MAX, and
> + * going from ullong back to ulong differs depending on the size
> + * of ulong. */
> if (memchr(s, '-', p - s)) {
> if (-val > ULONG_MAX)
> err = true;
> @@ -237,8 +275,17 @@ virStrToLong_ulp(char const *s, char **end_ptr, int base,
> char *p;
> int err;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoull_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoull(s, &p, base);
> +#endif
> err = (memchr(s, '-', p - s) ||
> errno || (!end_ptr && *p) || p == s || (unsigned long) val !=
> val);
> if (end_ptr)
> @@ -257,8 +304,17 @@ virStrToLong_ll(char const *s, char **end_ptr, int base,
> long long *result)
> char *p;
> int err;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoll_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoll(s, &p, base);
> +#endif
> err = (errno || (!end_ptr && *p) || p == s);
> if (end_ptr)
> *end_ptr = p;
> @@ -279,8 +335,17 @@ virStrToLong_ull(char const *s, char **end_ptr, int base,
> char *p;
> int err;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoull_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoull(s, &p, base);
> +#endif
> err = (errno || (!end_ptr && *p) || p == s);
> if (end_ptr)
> *end_ptr = p;
> @@ -300,8 +365,17 @@ virStrToLong_ullp(char const *s, char **end_ptr, int
> base,
> char *p;
> int err;
>
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + if (virLocaleInitialize() < 0)
> + return -1;
> +#endif
> +
> errno = 0;
> +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE)
> + val = strtoull_l(s, &p, base, virLocaleRaw);
> +#else
> val = g_ascii_strtoull(s, &p, base);
> +#endif
> err = (memchr(s, '-', p - s) ||
> errno || (!end_ptr && *p) || p == s);
> if (end_ptr)
> --
> 2.39.3
>
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|