Source: ibus Severity: normal Tags: patch Dear Team,
IBus parses the iso-codes iso_639-2.xml file to get the name of languages that IBus engines support. That has under 500 languages. The iso639-3.xml file has codes and names for the known languages at its time of publication. Keyman (www.keyman.com) already has support for over 1000 languages, many of which are only named in iso639-3. At the moment they are all grouped under "Other". Other engines such as m17n may support some of these languages too. I'm attaching a patch to use iso639-3 instead of iso639-2 I've made a PR for it upstream at https://github.com/ibus/ibus/pull/2061 Regards, Daniel -- System Information: Debian Release: buster/sid APT prefers testing APT policy: (500, 'testing') Architecture: amd64 (x86_64) Kernel: Linux 4.18.0-2-amd64 (SMP w/2 CPU cores) Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE=en_GB:en (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Init: systemd (via /run/systemd/system) LSM: AppArmor: enabled
--- a/ibus/lang.py +++ b/ibus/lang.py @@ -36,7 +36,7 @@ lang = lang.lower() if lang in __languages_dict: lang = __languages_dict[lang] - lang = gettext.dgettext("iso_639", lang) + lang = gettext.dgettext("iso_639-3", lang) else: lang = _(u"Other") lang = gettext.dgettext("ibus", lang) @@ -46,7 +46,7 @@ global __languages_dict try: name = attrs[u"name"] - for attr_name in (u"iso_639_2B_code", u"iso_639_2T_code", u"iso_639_1_code"): + for attr_name in (u"id", u"part1_code", u"part2_code"): if attr_name in attrs: attr_value = attrs[attr_name] __languages_dict[attr_value] = name @@ -62,12 +62,12 @@ def __load_lang(): import os import _config - iso_639_xml = os.path.join(_config.ISOCODES_PREFIX, "share/xml/iso-codes/iso_639.xml") + iso_639_3_xml = os.path.join(_config.ISOCODES_PREFIX, "share/xml/iso-codes/iso_639-3.xml") p = xml.parsers.expat.ParserCreate() p.StartElementHandler = __start_element p.EndElementHandler = __end_element p.CharacterDataHandler = __char_data - p.ParseFile(file(iso_639_xml)) + p.ParseFile(file(iso_639_3_xml)) __load_lang() --- a/engine/iso639converter.py +++ b/engine/iso639converter.py @@ -43,7 +43,7 @@ else: # io.StringIO does not work with XMLGenerator from cStringIO import StringIO - # iso_639.xml includes UTF-8 + # iso_639-3.xml includes UTF-8 reload(sys) sys.setdefaultencoding('utf-8') @@ -63,27 +63,27 @@ class ISO639XML(XMLFilterBase): def __init__(self, parser=None): - self.__code2to1 = {} + self.__code2to3 = {} self.__codetoname = {} XMLFilterBase.__init__(self, parser) def startElement(self, name, attrs): - if name != 'iso_639_entry': + if name != 'iso_639_3_entry': return n = attrs.get('name') - iso639_1 = attrs.get('iso_639_1_code') - iso639_2b = attrs.get('iso_639_2B_code') - iso639_2t = attrs.get('iso_639_2T_code') - if iso639_1 != None: - self.__codetoname[iso639_1] = n + iso639_3 = attrs.get('id') + iso639_2b = attrs.get('part1_code') + iso639_2t = attrs.get('part2_code') + if iso639_3 != None: + self.__codetoname[iso639_3] = n if iso639_2b != None: - self.__code2to1[iso639_2b] = iso639_1 + self.__code2to3[iso639_2b] = iso639_3 self.__codetoname[iso639_2b] = n - if iso639_2t != None and iso639_2b != iso639_2t: - self.__code2to1[iso639_2t] = iso639_1 + if iso639_2t != None: + self.__code2to3[iso639_2t] = iso639_3 self.__codetoname[iso639_2t] = n - def code2to1(self, iso639_2): + def code2to3(self, iso639_2): try: - return self.__code2to1[iso639_2] + return self.__code2to3[iso639_2] except KeyError: return None @@ -113,9 +113,9 @@ def characters(self, text): if self.__is_language: if self.__iso639: - iso639_1 = self.__iso639.code2to1(text) - if iso639_1 != None: - text = iso639_1 + iso639_3 = self.__iso639.code2to3(text) + if iso639_3 != None: + text = iso639_3 if self.__downstream: self.__downstream.characters(text) @@ -192,6 +192,6 @@ elif opt in ('-o', '--output'): output = arg - iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639.xml') + iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639-3.xml') xml = ConvertEngineXML(input, iso639) xml.write(output) --- a/src/ibusutil.c +++ b/src/ibusutil.c @@ -45,7 +45,7 @@ GList *p; g_assert (node); - if (G_UNLIKELY (g_strcmp0 (node->name, "iso_639_entries") != 0)) { + if (G_UNLIKELY (g_strcmp0 (node->name, "iso_639_3_entries") != 0)) { return FALSE; } @@ -57,9 +57,9 @@ const gchar *key; gchar *value; } entries[] = { - { "iso_639_2B_code", NULL }, - { "iso_639_2T_code", NULL }, - { "iso_639_1_code", NULL }, + { "id", NULL }, + { "part1_code", NULL }, + { "part2_code", NULL }, }; if (sub_node->attributes == NULL) { @@ -99,14 +99,14 @@ struct stat buf; #ifdef ENABLE_NLS - bindtextdomain ("iso_639", GLIB_LOCALE_DIR); - bind_textdomain_codeset ("iso_639", "UTF-8"); + bindtextdomain ("iso_639-3", GLIB_LOCALE_DIR); + bind_textdomain_codeset ("iso_639-3", "UTF-8"); #endif __languages_dict = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); filename = g_build_filename (ISOCODES_PREFIX, - "share/xml/iso-codes/iso_639.xml", + "share/xml/iso-codes/iso_639-3.xml", NULL); if (g_stat (filename, &buf) != 0) { g_warning ("Can not get stat of file %s", filename); @@ -157,7 +157,7 @@ if (g_strcmp0 (retval, "Other") == 0) return dgettext (GETTEXT_PACKAGE, N_("Other")); else - return dgettext ("iso_639", retval); + return dgettext ("iso_639-3", retval); #else return retval; #endif