Package: iso-codes Version: 0.48-1 The .pot files in iso-codes sometimes use msgids that are not ASCII. However, the GNU gettext manual says:
Note that the MSGID argument to `gettext' is not subject to character set conversion. Also, when `gettext' does not find a translation for MSGID, it returns MSGID unchanged - independently of the current output character set. It is therefore recommended that all MSGIDs be US-ASCII strings. An example problem scenario is if gettext("Ă…land Islands") returns its argument (which is UTF-8) in an EUC-JIS locale: the result will be a corrupted string being sent to the user. Here is a proposed patch, which causes the msgid to be the ASCII approximation "Aaland Islands" instead. 2005-09-30 Paul Eggert <[EMAIL PROTECTED]> * iso2pot.py: Convert msgids to an ASCII approximation if they are not already ASCII. diff -pru -I iso-codes- --exclude='*.po' iso-codes-0.48-tmp/iso2pot.py iso-codes-0.48-fix/iso2pot.py --- iso-codes-0.48-tmp/iso2pot.py 2004-06-15 13:36:43.000000000 -0700 +++ iso-codes-0.48-fix/iso2pot.py 2005-09-30 14:36:26.000000000 -0700 @@ -8,7 +8,7 @@ from xml.sax import saxutils, make_parser, saxlib, saxexts, ContentHandler from xml.sax.handler import feature_namespaces -import sys, os, getopt, urllib2, locale, time +import sys, os, getopt, urllib2, locale, time, codecs class printPot(saxutils.DefaultHandler): def __init__(self, nameslist,comment, ofile): @@ -21,6 +21,57 @@ class printPot(saxutils.DefaultHandler): self.ofile = ofile self.done = {} + def asciiize(exc): + """Convert to an ASCII approximation of the input. + """ + codepoint2ascii = { + 0x00C5 : 'Aa', + 0x00DA : 'U', + 0x00E1 : 'a', + 0x00E2 : 'a', + 0x00E4 : 'a', + 0x00E5 : 'aa', + 0x00E7 : 'c', + 0x00E8 : 'e', + 0x00E9 : 'e', + 0x00EB : 'e', + 0x00ED : 'i', + 0x00EF : 'i', + 0x00C1 : 'A', + 0x00F1 : 'n', + 0x00F3 : 'o', + 0x00F4 : 'o', + 0x00F6 : 'o', + 0x00F8 : 'o', + 0x00FA : 'u', + 0x00FD : 'y', + 0x0101 : 'a', + 0x010D : 'c', + 0x011B : 'e', + 0x012B : 'i', + 0x0141 : 'L', + 0x0142 : 'l', + 0x0148 : 'n', + 0x0151 : 'o', + 0x0159 : 'r', + 0x015A : 'S', + 0x015B : 's', + 0x015E : 'S', + 0x015F : 's', + 0x0161 : 's', + 0x0162 : 'T', + 0x0163 : 't', + 0x016B : 'u', + 0x017D : 'Z', + } + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + l = [] + for c in exc.object[exc.start:exc.end]: + l.append(codepoint2ascii[ord(c)]) + return (u''.join(l), exc.end) + + codecs.register_error('iso2pot.asciiize', asciiize) def startElement(self, name, attrs): # Get the name attributes @@ -28,7 +79,7 @@ class printPot(saxutils.DefaultHandler): n = attrs.get(aname, None) c = attrs.get(self.comment, None) if type(n) == unicode: - n = n.encode('UTF-8') + n = n.encode('ascii', 'iso2pot.asciiize') if type(c) == unicode: c = c.encode('UTF-8') if n != None and not self.done.has_key(n):