localcharset, nl_langinfo: fix return value for UTF-8 locales on MSVC

Bruno Haible Tue, 02 Jul 2019 13:12:02 -0700

On MSVC (on Windows 10), I'm seeing this test failure:

FAIL: test-nl_langinfo.sh
=========================


C:\testdir-posix-msvc\gltests\test-nl_langinfo.c:57: assertion 'c_strcasecmp 
(codeset, "UTF-8") == 0 || c_strcasecmp (codeset, "UTF8") == 0' failed
FAIL test-nl_langinfo.sh (exit status: 1)

The test uses the return value of setlocale (.., NULL) in the locale named
French_France.65001. Apparently the locale name returned by setlocale is now
"French_France.utf8". This is the first sign of explicit support of an UTF-8
locale in the Microsoft runtime library!

nl_langinfo.c "canonicalizes" this result to "CPutf8", which is nonsense. It
should be "UTF-8".

Likewise in localcharset.c.


2019-07-02  Bruno Haible  <br...@clisp.org>

        localcharset, nl_langinfo: Fix return value for UTF-8 locales on MSVC.
        * lib/localcharset.c (locale_charset): Return "UTF-8" instead of
        "CPutf8".
        * lib/nl_langinfo.c (ctype_codeset): Likewise.

diff --git a/lib/localcharset.c b/lib/localcharset.c
index 80a20b1..173d116 100644
--- a/lib/localcharset.c
+++ b/lib/localcharset.c
@@ -787,7 +787,12 @@ locale_charset (void)
         encoding is the best bet.  */
       sprintf (buf, "CP%u", GetACP ());
     }
-  codeset = buf;
+  /* For a locale name such as "French_France.65001", in Windows 10,
+     setlocale now returns "French_France.utf8" instead.  */
+  if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
+    codeset = "UTF-8";
+  else
+    codeset = buf;
 
 # elif defined OS2
 
diff --git a/lib/nl_langinfo.c b/lib/nl_langinfo.c
index e8a5595..76579a8 100644
--- a/lib/nl_langinfo.c
+++ b/lib/nl_langinfo.c
@@ -76,9 +76,15 @@ ctype_codeset (void)
     memmove (buf + 2, codeset, codesetlen + 1);
   else
     sprintf (buf + 2, "%u", GetACP ());
-  codeset = memcpy (buf, "CP", 2);
-# endif
+  /* For a locale name such as "French_France.65001", in Windows 10,
+     setlocale now returns "French_France.utf8" instead.  */
+  if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
+    return "UTF-8";
+  else
+    return memcpy (buf, "CP", 2);
+# else
   return codeset;
+#endif
 }
 #endif

localcharset, nl_langinfo: fix return value for UTF-8 locales on MSVC

Reply via email to