Sorry for the delay. KO Myung-Hun <kom...@gmail.com> writes:
>> I'm attaching two patches: the first one is an update of the patch we >> are currently working on (I added mappings from the "unusable" codepages >> to their equivalents), and the second one is an alternative >> implementation following the kLIBC implementation. > > I prefer second one. It was my first idea and more simple. ^^ Okay, if nobody objects, I'll push the attached (I did a bit more research on OS/2 codepages and added mappings from them to the GNU canonical charset names as possible). Regards, -- Daiki Ueno
>From 0d7bfa1a8177175adb04f12d163cd606602d4285 Mon Sep 17 00:00:00 2001 From: KO Myung-Hun <k...@chollian.net> Date: Thu, 23 Feb 2012 22:37:21 +0900 Subject: [PATCH] localcharset: improve charset detection on OS/2 Use system codepage if appropriate. Map the OS/2 codepages to the GNU canonical charset names. * lib/config.charset: Remove os2* from case "$os" in * lib/localcharset.c (get_charset_aliases) [OS2]: Embed the mapping from the OS/2 codepages to the GNU canonical charset names. (locale_charset) [OS2]: Use system codepage if codeset is omitted from the locale name and the locale name is not "C" nor "POSIX". --- ChangeLog | 12 ++++++++++++ lib/config.charset | 4 +--- lib/localcharset.c | 42 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9f5611d..ec8a19f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2015-01-05 KO Myung-Hun <k...@chollian.net> + + localcharset: improve charset detection on OS/2 + Use system codepage if appropriate. Map the OS/2 codepages to the + GNU canonical charset names. + * lib/config.charset: Remove os2* from case "$os" in + * lib/localcharset.c (get_charset_aliases) [OS2]: Embed the + mapping from the OS/2 codepages to the GNU canonical charset + names. + (locale_charset) [OS2]: Use system codepage if codeset is omitted + from the locale name and the locale name is not "C" nor "POSIX". + 2015-01-04 Paul Eggert <egg...@cs.ucla.edu> lib-symbol-versions: cache script check diff --git a/lib/config.charset b/lib/config.charset index 1b8a3f7..ac10e4b 100644 --- a/lib/config.charset +++ b/lib/config.charset @@ -348,12 +348,10 @@ case "$os" in #echo "sun_eu_greek ?" # what is this? echo "UTF-8 UTF-8" ;; - freebsd* | os2*) + freebsd*) # FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore # localcharset.c falls back to using the full locale name # from the environment variables. - # Likewise for OS/2. OS/2 has XFree86 just like FreeBSD. Just - # reuse FreeBSD's locale data for OS/2. echo "C ASCII" echo "US-ASCII ASCII" for l in la_LN lt_LN; do diff --git a/lib/localcharset.c b/lib/localcharset.c index d54dbfb..fa25d4d 100644 --- a/lib/localcharset.c +++ b/lib/localcharset.c @@ -128,7 +128,7 @@ get_charset_aliases (void) cp = charset_aliases; if (cp == NULL) { -#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__) +#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2) const char *dir; const char *base = "charset.alias"; char *file_name; @@ -342,6 +342,36 @@ get_charset_aliases (void) "CP54936" "\0" "GB18030" "\0" "CP65001" "\0" "UTF-8" "\0"; # endif +# if defined OS2 + /* To avoid the troubles of installing a separate file in the same + directory as the DLL and of retrieving the DLL's directory at + runtime, simply inline the aliases here. */ + + /* The list of encodings is based on "List of OS/2 Codepages" + assembled by Alex Taylor: + <http://altsan.org/os2/toolkits/uls/index.html#codepages> + and "IBM Globalization - Code page identifiers": + <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */ + cp = "CP813" "\0" "ISO-8859-7" "\0" + "CP878" "\0" "KOI8-R" "\0" + "CP819" "\0" "ISO-8859-1" "\0" + "CP912" "\0" "ISO-8859-2" "\0" + "CP913" "\0" "ISO-8859-3" "\0" + "CP914" "\0" "ISO-8859-4" "\0" + "CP915" "\0" "ISO-8859-5" "\0" + "CP916" "\0" "ISO-8859-8" "\0" + "CP920" "\0" "ISO-8859-9" "\0" + "CP921" "\0" "ISO-8859-13" "\0" + "CP923" "\0" "ISO-8859-15" "\0" + "CP954" "\0" "EUC-JP" "\0" + "CP964" "\0" "EUC-TW" "\0" + "CP970" "\0" "EUC-KR" "\0" + "CP1089" "\0" "ISO-8859-6" "\0" + "CP1208" "\0" "UTF-8" "\0" + "CP1381" "\0" "GB2312" "\0" + "CP1386" "\0" "GBK" "\0" + "CP3372" "\0" "EUC-JP" "\0"; +# endif #endif charset_aliases = cp; @@ -499,6 +529,8 @@ locale_charset (void) ULONG cp[3]; ULONG cplen; + codeset = NULL; + /* Allow user to override the codeset, as set in the operating system, with standard language environment variables. */ locale = getenv ("LC_ALL"); @@ -530,10 +562,12 @@ locale_charset (void) } } - /* Resolve through the charset.alias file. */ - codeset = locale; + /* For the POSIX locale, don't use the system's codepage. */ + if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) + codeset = ""; } - else + + if (codeset == NULL) { /* OS/2 has a function returning the locale's codepage as a number. */ if (DosQueryCp (sizeof (cp), cp, &cplen)) -- 2.1.0