[PATCH 1/2] quotearg: do not depend on localcharset

Paul Eggert Fri, 05 Sep 2025 00:22:43 -0700

* lib/quotearg.c: Do not include localcharset.h.
(gettext_quote): Do not use locale_charset; instead, use
mbrtoc32 to determine whether the character set uses UTF-8.
Treat GB-18030 like similar encodings, as it is not worth the
trouble of special-casing: its special-case code has been broken
since 2011, nobody cared enough to report the bug, and the idea
that someone would use GB-18030 without translations is weird anyway.
* modules/quotearg (Depends-on): Remove localcharset.
---
 ChangeLog        | 12 ++++++++++++
 lib/quotearg.c   | 27 ++++++++++++++++-----------
 modules/quotearg |  1 -
 3 files changed, 28 insertions(+), 12 deletions(-)


diff --git a/ChangeLog b/ChangeLog
index 5052d65d21..59665cbd73 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2025-09-04  Paul Eggert  <[email protected]>
+
+       quotearg: do not depend on localcharset
+       * lib/quotearg.c: Do not include localcharset.h.
+       (gettext_quote): Do not use locale_charset; instead, use
+       mbrtoc32 to determine whether the character set uses UTF-8.
+       Treat GB-18030 like similar encodings, as it is not worth the
+       trouble of special-casing: its special-case code has been broken
+       since 2011, nobody cared enough to report the bug, and the idea
+       that someone would use GB-18030 without translations is weird anyway.
+       * modules/quotearg (Depends-on): Remove localcharset.
+
 2025-09-04  Bruno Haible  <[email protected]>
 
        gnulib-tool: Improve changes from yesterday and today.
diff --git a/lib/quotearg.c b/lib/quotearg.c
index a587cd5b45..74878cbc78 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -33,7 +33,6 @@
 #include "minmax.h"
 #include "xalloc.h"
 #include "c-strcaseeq.h"
-#include "localcharset.h"
 
 #include <ctype.h>
 #include <errno.h>
@@ -204,7 +203,7 @@ gettext_quote (char const *msgid, enum quoting_style s)
   if (translation != msgid)
     return translation;
 
-  /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019.
+  /* For UTF-8, use single quotes U+2018 and U+2019.
      Here is a list of other locales that include U+2018 and U+2019:
 
         ISO-8859-7   0xA1                 KOI8-T       0x91
@@ -218,15 +217,21 @@ gettext_quote (char const *msgid, enum quoting_style s)
         EUC-JP       0xA1 0xC6            EUC-KR       0xA1 0xAE
         EUC-TW       0xA1 0xE4            BIG5         0xA1 0xA5
         BIG5-HKSCS   0xA1 0xA5            EUC-CN       0xA1 0xAE
-        GBK          0xA1 0xAE            Georgian-PS  0x91
-        PT154        0x91
-
-     None of these is still in wide use; using iconv is overkill.  */
-  locale_code = locale_charset ();
-  if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0))
-    return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99";
-  if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0))
-    return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf";
+        GBK          0xA1 0xAE            GB-18030     0xA1 0xAE
+        Georgian-PS  0x91                 PT154        0x91
+
+     These are not in wide use; using iconv is overkill,
+     and C-locale quotes might be better for these locales anyway.
+
+     If the current encoding is consistent with UTF-8 for U+2018,
+     assume that the locale uses UTF-8.  This is safe in practice,
+     and means we need not use a function like locale_charset that
+     has other dependencies.  */
+  static char const quote[][4] = { "\xe2\x80\x98", "\xe2\x80\x99" };
+  char32_t w;
+  mbstate_t mbstate = {0,};
+  if (mbrtoc32 (&w, quote[0], 3, &mbstate) == 3 && w == 0x2018)
+    return quote[msgid[0] == '\''];
 
   return (s == clocale_quoting_style ? "\"" : "'");
 }
diff --git a/modules/quotearg b/modules/quotearg
index 371a55a3dd..0ac616bd97 100644
--- a/modules/quotearg
+++ b/modules/quotearg
@@ -20,7 +20,6 @@ mbsinit
 memcmp
 minmax
 quotearg-simple
-localcharset
 bool
 stdint-h
 uchar-h
-- 
2.48.1

[PATCH 1/2] quotearg: do not depend on localcharset

Reply via email to