Package: man-db Version: 2.4.3-3 Severity: important for Chinese who do not use UTF-8 locole
The current verison of man-db could not handle Chinese manual pages with GBK locale, because we have no such nroff device. But we can use the utf8 instead. Here is the patch. Now, it can support GB2312, GBK, BIG5, EUC-TW coding well, support locale zh_CN.GB2312, zh_CN.GBK, zh_TW.BIG5, zh_TW.EUCTW and should zh_SG.GBK. The patch assume the pages in directory zh_CN and zh_SG with GBK encoding, zh_TW with BIG5 encoding, and directory zh with UTF-8 encoding. Another problem is the the less utility can not handle correctly with bold style Chinese. We can use w3m instead or use less with -u option without bold style. diff -bNru man-db-2.4.3/src/encodings.c man-db-2.4.3-new/src/encodings.c --- man-db-2.4.3/src/encodings.c 2005-01-05 23:11:54.000000000 +0800 +++ man-db-2.4.3-new/src/encodings.c 2006-02-25 14:31:50.000000000 +0800 @@ -98,6 +98,10 @@ { "no", "ISO-8859-1", "ISO-8859-1" }, /* Norwegian */ { "pt", "ISO-8859-1", "ISO-8859-1" }, /* Portuguese */ { "sv", "ISO-8859-1", "ISO-8859-1" }, /* Swedish */ + { "zh_CN", "GBK", "UTF-8" }, /* Chinese */ + { "zh_SG", "GBK", "UTF-8" }, /* Chinese */ + { "zh_TW", "BIG5", "UTF-8" }, /* Chinese */ + { "zh", "UTF-8", "UTF-8" }, /* Chinese */ #ifdef MULTIBYTE_GROFF /* These languages require a patched version of groff with the @@ -133,6 +137,10 @@ { "ISO-8859-1", "latin1" }, { "ISO-8859-15", "latin1" }, { "UTF-8", "utf8" }, + { "GBK", "gb" }, + { "GB2312", "gb" }, + { "BIG5", "big5" }, + { "EUC-TW", "euc" }, #ifdef MULTIBYTE_GROFF { "EUC-JP", "nippon" }, @@ -160,20 +168,24 @@ struct device_entry { const char *roff_device; const char *roff_encoding; + const char *virtual_device; const char *output_encoding; }; static struct device_entry device_table[] = { - { "ascii", "ISO-8859-1", "ANSI_X3.4-1968" }, - { "latin1", "ISO-8859-1", "ISO-8859-1" }, - { "utf8", "ISO-8859-1", "UTF-8" }, + { "ascii", "ISO-8859-1", "ascii", "ANSI_X3.4-1968" }, + { "latin1", "ISO-8859-1", "latin1", "ISO-8859-1" }, + { "utf8", "UTF-8", "utf8", "UTF-8" }, + { "gb", "GBK", "utf8", "UTF-8" }, + { "big5", "BIG5", "utf8", "UTF-8" }, + { "euc", "EUC-TW", "utf8", "UTF-8" }, #ifdef MULTIBYTE_GROFF - { "ascii8", NULL, NULL }, - { "nippon", "EUC-JP", "EUC-JP" }, + { "ascii8", NULL, "ascii8", NULL }, + { "nippon", "EUC-JP", "nippon", "EUC-JP" }, #endif /* MULTIBYTE_GROFF */ - { NULL, NULL, NULL } + { NULL, NULL, NULL, NULL } }; static const char *fallback_roff_encoding = "ISO-8859-1"; @@ -382,9 +394,11 @@ * we want or else it probably won't work at all no matter what we * do. We might as well try it for now. */ - if (STREQ (input, "UTF-8")) + if (STREQ (input, "UTF-8")||STREQ (output, "UTF-8")) return 1; + if (STREQ (input, "BIG5") && STREQ (output, "EUC-TW")) + return 1; #ifdef MULTIBYTE_GROFF /* Special case for ja_JP.UTF-8, which takes UTF-8 input recoded * from EUC-JP and produces UTF-8 output. This is rather filthy. @@ -463,6 +477,19 @@ return roff_encoding ? roff_encoding : source_encoding; } +/* Find the virtual output device will be use. + */ +const char *get_virtual_device (const char *device) +{ + const struct device_entry *entry; + + for (entry = device_table; entry->roff_device; ++entry) + if (STREQ (entry->roff_device, device)) + return entry->virtual_device; + + return NULL; +} + /* Find the output encoding that this device will produce, or NULL if it * will simply pass through the input encoding. */ diff -bNru man-db-2.4.3/src/encodings.h man-db-2.4.3-new/src/encodings.h --- man-db-2.4.3/src/encodings.h 2004-04-01 11:57:37.000000000 +0800 +++ man-db-2.4.3-new/src/encodings.h 2006-02-25 14:24:20.000000000 +0800 @@ -28,5 +28,6 @@ const char *source_encoding); const char *get_roff_encoding (const char *device, const char *source_encoding); +const char *get_virtual_device (const char *device); const char *get_output_encoding (const char *device); const char *get_less_charset (const char *locale_charset); diff -bNru man-db-2.4.3/src/man.c man-db-2.4.3-new/src/man.c --- man-db-2.4.3/src/man.c 2006-02-25 14:23:01.000000000 +0800 +++ man-db-2.4.3-new/src/man.c 2006-02-25 14:24:20.000000000 +0800 @@ -1726,7 +1726,7 @@ if (roff_device) { char *tmpdev = strappend (NULL, "-T", - roff_device, + get_virtual_device(roff_device), NULL); command_arg (cmd, tmpdev); free (tmpdev); Best regards, Wu Songhai
patch-man-db_2.4.3-zh
Description: Binary data