Package: python-docutils Version: 0.3.7-2 Severity: normal Tags: patch rst parser doesn't think about CJK characters width. It cause problems in CJK environment.
For example, one Chinese character's width is equal to 2 ASCII characters'. But docutils just count number of characters. All markups but TABLE can work. In table markup, this problem makes marking up tables very difficult. For example, if 'CC' is one Chinese character, and 'a' is one ASCII character, reST's table is looks like this: +----+----+ |CCCC |CCCCCC | |CCaa |aaCC | |CCCC |CCaCC | |aaaa|CCCC | +----+----+ here is a patch for this problem. I got it from http://city.plala.jp/download/rst/ . This patch is used about one year and it looks like have no problems. It's license is.. > Copyright (C) 2004 by Matsumoto,Tadashi > (E-Mail Address: [EMAIL PROTECTED]) > > Everyone is permitted to do anything on this program > including copying, modifying, improving, > as long as you don't try to pretend that you wrote it. > i.e., the above copyright notice has to appear in all copies. > Binary distribution requires original version messages. > You don't have to ask before copying, redistribution or publishing. > THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. regards, -- System Information: Debian Release: 3.1 APT prefers testing APT policy: (101, 'testing') Architecture: i386 (i686) Kernel: Linux 2.6.8-1-686 Locale: LANG=ja_JP.eucJP, LC_CTYPE=ja_JP.eucJP (charmap=EUC-JP) Versions of packages python-docutils depends on: ii python 2.3.5-1 An interactive high-level object-o ii python2.3-docutils 0.3.7-2 Dependency package for python-docu -- no debconf information
diff -urP /usr/lib/site-python/docutils/parsers/rst/adjusttable.py docutils/parsers/rst/adjusttable.py --- /usr/lib/site-python/docutils/parsers/rst/adjusttable.py 1970-01-01 09:00:00.000000000 +0900 +++ docutils/parsers/rst/adjusttable.py 2005-03-28 16:25:20.276759120 +0900 @@ -0,0 +1,85 @@ +""" +adjusttable.py + +Copyright (C) 2004 by Matsumoto,Tadashi +(E-Mail Address: [EMAIL PROTECTED]) + +Everyone is permitted to do anything on this program +including copying, modifying, improving, +as long as you don't try to pretend that you wrote it. +i.e., the above copyright notice has to appear in all copies. +Binary distribution requires original version messages. +You don't have to ask before copying, redistribution or publishing. +THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. +""" +from cStringIO import StringIO + +jplib_exists = 1 +try: + import jplib +except: + jplib_exists = 0 + +if jplib_exists: + is7bit = jplib.is7bit + kanjiwidth = jplib.width +else: + import re + is7bit = re.compile('^[\000-\177]*$').match + def kanjiwidth(u): + w = 0 + for c in u: + if ord(c) > 255: + w += 2 + else: + w += 1 + return w + +def adjustgridtable(lines): + rlines = [] + for line in lines: + rline = [] + for col in line.split('|'): + if is7bit(col): + rline.append(col) + else: + diff = kanjiwidth(col) - len(col) + if diff: + rline.append(col+' '*diff) + rline = '|'.join(rline) + rlines.append(rline) + return rlines + +def adjustsimpletable(lines, columns): + rlines = [] + limit = len(columns) + for line in lines: + if is7bit(line): + rlines.append(line) + continue + f = StringIO() + i = 0 + start, end = columns[0] + width = 0 + kanji = 0 + for c in line: + f.write(c.encode('utf-8')) + if ord(c) > 255: + width += 2 + kanji += 1 + else: + width +=1 + if width <= end: + continue + else: + i += 1 + if i < limit: + if kanji: + f.write(' '*kanji) + kanji = 0 + start, end = columns[i] + else: + end = len(line)*2 + 1 + rline = f.getvalue() + rlines.append(rline.decode('utf-8')) + return rlines diff -urP /usr/lib/site-python/docutils/parsers/rst/states.py docutils/parsers/rst/states.py --- /usr/lib/site-python/docutils/parsers/rst/states.py 2004-11-26 17:27:55.000000000 +0900 +++ docutils/parsers/rst/states.py 2005-03-28 16:25:20.275759272 +0900 @@ -1,7 +1,7 @@ # Author: David Goodger # Contact: [EMAIL PROTECTED] -# Revision: $Revision: 1.86 $ -# Date: $Date: 2004/11/06 19:52:19 $ +# Revision: $Revision: 1.2.10.7 $ +# Date: $Date: 2005/01/07 13:26:03 $ # Copyright: This module has been placed in the public domain. """ @@ -117,6 +117,7 @@ from docutils.utils import escape2null, unescape from docutils.parsers.rst import directives, languages, tableparser, roles from docutils.parsers.rst.languages import en as _fallback_language_module +from docutils.parsers.rst import adjusttable class MarkupError(DataError): pass @@ -1551,6 +1552,7 @@ blank_finish = 1 try: block = self.state_machine.get_text_block(flush_left=1) + block.data = adjusttable.adjustgridtable(block) except statemachine.UnexpectedIndentationError, instance: block, source, lineno = instance.args messages.append(self.reporter.error('Unexpected indentation.', diff -urP /usr/lib/site-python/docutils/parsers/rst/tableparser.py docutils/parsers/rst/tableparser.py --- /usr/lib/site-python/docutils/parsers/rst/tableparser.py 2003-07-06 05:38:46.000000000 +0900 +++ docutils/parsers/rst/tableparser.py 2005-03-28 16:25:20.276759120 +0900 @@ -1,7 +1,7 @@ # Author: David Goodger # Contact: [EMAIL PROTECTED] -# Revision: $Revision: 1.9 $ -# Date: $Date: 2003/07/05 22:38:28 $ +# Revision: $Revision: 1.2.10.6 $ +# Date: $Date: 2005/01/07 13:26:04 $ # Copyright: This module has been placed in the public domain. """ @@ -25,6 +25,7 @@ import re import sys from docutils import DataError +from docutils.parsers.rst import adjusttable class TableMarkupError(DataError): pass @@ -463,6 +464,7 @@ else: columns = self.columns[:] span_offset = start + lines.data = adjusttable.adjustsimpletable(lines, columns) self.check_columns(lines, start, columns) row = self.init_row(columns, start) for i in range(len(columns)): diff -urP /usr/lib/site-python/docutils/utils.py docutils/utils.py --- /usr/lib/site-python/docutils/utils.py 2004-11-07 03:52:35.000000000 +0900 +++ docutils/utils.py 2005-03-28 16:25:20.278758816 +0900 @@ -1,7 +1,7 @@ # Author: David Goodger # Contact: [EMAIL PROTECTED] -# Revision: $Revision: 1.40 $ -# Date: $Date: 2004/09/30 13:47:58 $ +# Revision: $Revision: 1.2.10.7 $ +# Date: $Date: 2005/01/07 13:26:02 $ # Copyright: This module has been placed in the public domain. """ @@ -494,12 +494,12 @@ parts = [] start = 0 while 1: - found = text.find('\\', start) + found = text.find(u'\\', start) if found == -1: parts.append(text[start:]) return ''.join(parts) parts.append(text[start:found]) - parts.append('\x00' + text[found+1:found+2]) + parts.append(u'\x00' + text[found+1:found+2]) start = found + 2 # skip character after escape def unescape(text, restore_backslashes=0): @@ -508,9 +508,9 @@ Backslash-escaped spaces are also removed. """ if restore_backslashes: - return text.replace('\x00', '\\') + return text.replace(u'\x00', u'\\') else: - for sep in ['\x00 ', '\x00\n', '\x00']: + for sep in [u'\x00 ', u'\x00\n', u'\x00']: text = ''.join(text.split(sep)) return text