On 2025-09-05 13:41, Collin Funk wrote:
Paul Eggert<[email protected]> writes:
...
+ mbstate_t mbstate = {0,};
+ if (mbrtoc32 (&w, quote[0], 3, &mbstate) == 3 && w == 0x2018)
+ return quote[msgid[0] == '\''];
Any issue with using the following here:
mbstate_t mbstate; mbszero (&mbstate);
Mostly because I find it less ugly
Oh my, tastes differ. The "= {0}" notation is shorter and to me it's
easier to read.
But you're right, mbszero is the Gnulib way to do this, for speed if not
readability. I had forgotten about mbszero; thanks for reminding me. I
installed the attached, which also affects the dfa and propername-lite
modules.From 012cf99fdab6e82e93715946522a976ab0724e87 Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Fri, 5 Sep 2025 15:18:31 -0700
Subject: [PATCH] dfa, propername-lite, quotearg: prefer mbszero
Problem reported by Collin Funk in:
https://lists.gnu.org/r/bug-gnulib/2025-09/msg00063.html
* lib/dfa.c (mbszero) [GAWK]: Omit unnecessary parens.
* lib/localeinfo.c (mbszero) [GAWK]: New macro, copied from dfa.c.
(is_using_utf8, init_localeinfo): Use it.
* lib/propername-lite.c (proper_name_lite):
* lib/quotearg.c (gettext_quote):
Prefer mbszero to initializing the entire mbstate_t.
---
ChangeLog | 10 ++++++++++
lib/dfa.c | 2 +-
lib/localeinfo.c | 5 +++--
lib/propername-lite.c | 4 ++--
lib/quotearg.c | 11 +++++------
5 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index d5fde9e134..3ec2fff3ec 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2025-09-05 Paul Eggert <[email protected]>
+ dfa, propername-lite, quotearg: prefer mbszero
+ Problem reported by Collin Funk in:
+ https://lists.gnu.org/r/bug-gnulib/2025-09/msg00063.html
+ * lib/dfa.c (mbszero) [GAWK]: Omit unnecessary parens.
+ * lib/localeinfo.c (mbszero) [GAWK]: New macro, copied from dfa.c.
+ (is_using_utf8, init_localeinfo): Use it.
+ * lib/propername-lite.c (proper_name_lite):
+ * lib/quotearg.c (gettext_quote):
+ Prefer mbszero to initializing the entire mbstate_t.
+
unlocked-io: also deal with fileno
This shouldn’t affect performance on glibc, where fileno and
fileno_unlocked are aliases under the theory that no locking is needed.
diff --git a/lib/dfa.c b/lib/dfa.c
index 6180c43451..284f10da3f 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -50,7 +50,7 @@
# define c32tob wctob
# define c32isprint iswprint
# define c32isspace iswspace
-# define mbszero(p) memset ((p), 0, sizeof (mbstate_t))
+# define mbszero(p) memset (p, 0, sizeof (mbstate_t))
#else
/* Use ISO C 11 + gnulib API. */
# include <uchar.h>
diff --git a/lib/localeinfo.c b/lib/localeinfo.c
index a4396d463c..ff38e525ea 100644
--- a/lib/localeinfo.c
+++ b/lib/localeinfo.c
@@ -32,6 +32,7 @@
# define mbrtoc32 mbrtowc
# define c32tolower towlower
# define c32toupper towupper
+# define mbszero(p) memset (p, 0, sizeof (mbstate_t))
#else
/* Use ISO C 11 + gnulib API. */
# include <uchar.h>
@@ -46,7 +47,7 @@ static bool
is_using_utf8 (void)
{
char32_t wc;
- mbstate_t mbs = {0};
+ mbstate_t mbs; mbszero (&mbs);
return mbrtoc32 (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
}
@@ -102,7 +103,7 @@ init_localeinfo (struct localeinfo *localeinfo)
{
char c = i;
unsigned char uc = i;
- mbstate_t s = {0};
+ mbstate_t s; mbszero (&s);
char32_t wc;
size_t len = mbrtoc32 (&wc, &c, 1, &s);
localeinfo->sbclen[uc] = len <= 1 ? 1 : - (int) - len;
diff --git a/lib/propername-lite.c b/lib/propername-lite.c
index d465548238..0834a2a6ea 100644
--- a/lib/propername-lite.c
+++ b/lib/propername-lite.c
@@ -45,8 +45,8 @@ proper_name_lite (char const *name_ascii, _GL_UNUSED char const *name_utf8)
/* If DF BF decodes to 07FF, assume it is UTF-8. */
static char const utf07FF[] = { 0xDF, 0xBF };
char32_t w;
- mbstate_t mbstate = {0,};
- if (mbrtoc32 (&w, utf07FF, 2, &mbstate) == 2 && w == 0x07FF)
+ mbstate_t mbs; mbszero (&mbs);
+ if (mbrtoc32 (&w, utf07FF, 2, &mbs) == 2 && w == 0x07FF)
return name_utf8;
#endif
diff --git a/lib/quotearg.c b/lib/quotearg.c
index 91a2afd733..24f5e18221 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -229,8 +229,8 @@ gettext_quote (char const *msgid, enum quoting_style s)
has other dependencies. */
static char const quote[][4] = { "\xe2\x80\x98", "\xe2\x80\x99" };
char32_t w;
- mbstate_t mbstate = {0,};
- if (mbrtoc32 (&w, quote[0], 3, &mbstate) == 3 && w == 0x2018)
+ mbstate_t mbs; mbszero (&mbs);
+ if (mbrtoc32 (&w, quote[0], 3, &mbs) == 3 && w == 0x2018)
return quote[msgid[0] == '\''];
return (s == clocale_quoting_style ? "\"" : "'");
@@ -606,8 +606,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
}
else
{
- mbstate_t mbstate;
- mbszero (&mbstate);
+ mbstate_t mbs; mbszero (&mbs);
m = 0;
printable = true;
@@ -618,7 +617,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
{
char32_t w;
size_t bytes = mbrtoc32 (&w, &arg[i + m],
- argsize - (i + m), &mbstate);
+ argsize - (i + m), &mbs);
if (bytes == 0)
break;
else if (bytes == (size_t) -1)
@@ -661,7 +660,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
m += bytes;
}
#if !GNULIB_MBRTOC32_REGULAR
- if (mbsinit (&mbstate))
+ if (mbsinit (&mbs))
#endif
break;
}
--
2.48.1