* lib/quotearg.c: Do not include localcharset.h. (gettext_quote): Do not use locale_charset; instead, use mbrtoc32 to determine whether the character set uses UTF-8. Treat GB-18030 like similar encodings, as it is not worth the trouble of special-casing: its special-case code has been broken since 2011, nobody cared enough to report the bug, and the idea that someone would use GB-18030 without translations is weird anyway. * modules/quotearg (Depends-on): Remove localcharset. --- ChangeLog | 12 ++++++++++++ lib/quotearg.c | 27 ++++++++++++++++----------- modules/quotearg | 1 - 3 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/ChangeLog b/ChangeLog index 5052d65d21..59665cbd73 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2025-09-04 Paul Eggert <[email protected]> + + quotearg: do not depend on localcharset + * lib/quotearg.c: Do not include localcharset.h. + (gettext_quote): Do not use locale_charset; instead, use + mbrtoc32 to determine whether the character set uses UTF-8. + Treat GB-18030 like similar encodings, as it is not worth the + trouble of special-casing: its special-case code has been broken + since 2011, nobody cared enough to report the bug, and the idea + that someone would use GB-18030 without translations is weird anyway. + * modules/quotearg (Depends-on): Remove localcharset. + 2025-09-04 Bruno Haible <[email protected]> gnulib-tool: Improve changes from yesterday and today. diff --git a/lib/quotearg.c b/lib/quotearg.c index a587cd5b45..74878cbc78 100644 --- a/lib/quotearg.c +++ b/lib/quotearg.c @@ -33,7 +33,6 @@ #include "minmax.h" #include "xalloc.h" #include "c-strcaseeq.h" -#include "localcharset.h" #include <ctype.h> #include <errno.h> @@ -204,7 +203,7 @@ gettext_quote (char const *msgid, enum quoting_style s) if (translation != msgid) return translation; - /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019. + /* For UTF-8, use single quotes U+2018 and U+2019. Here is a list of other locales that include U+2018 and U+2019: ISO-8859-7 0xA1 KOI8-T 0x91 @@ -218,15 +217,21 @@ gettext_quote (char const *msgid, enum quoting_style s) EUC-JP 0xA1 0xC6 EUC-KR 0xA1 0xAE EUC-TW 0xA1 0xE4 BIG5 0xA1 0xA5 BIG5-HKSCS 0xA1 0xA5 EUC-CN 0xA1 0xAE - GBK 0xA1 0xAE Georgian-PS 0x91 - PT154 0x91 - - None of these is still in wide use; using iconv is overkill. */ - locale_code = locale_charset (); - if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0)) - return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99"; - if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0)) - return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf"; + GBK 0xA1 0xAE GB-18030 0xA1 0xAE + Georgian-PS 0x91 PT154 0x91 + + These are not in wide use; using iconv is overkill, + and C-locale quotes might be better for these locales anyway. + + If the current encoding is consistent with UTF-8 for U+2018, + assume that the locale uses UTF-8. This is safe in practice, + and means we need not use a function like locale_charset that + has other dependencies. */ + static char const quote[][4] = { "\xe2\x80\x98", "\xe2\x80\x99" }; + char32_t w; + mbstate_t mbstate = {0,}; + if (mbrtoc32 (&w, quote[0], 3, &mbstate) == 3 && w == 0x2018) + return quote[msgid[0] == '\'']; return (s == clocale_quoting_style ? "\"" : "'"); } diff --git a/modules/quotearg b/modules/quotearg index 371a55a3dd..0ac616bd97 100644 --- a/modules/quotearg +++ b/modules/quotearg @@ -20,7 +20,6 @@ mbsinit memcmp minmax quotearg-simple -localcharset bool stdint-h uchar-h -- 2.48.1
