FWIW here is a patch that a) tries to detect if user is running in utf-8 locale by a heuristic similar that that used in the autoconf test b) allows setting the unicode locale at runtime (changes the define to a variable)
The autoconf script is not modified, removing the define should be trivial for anybody who knows autoconf, though. Thanks Michal
diff -ur vilistextum-2.6.9/debian/changelog vilistextum-2.6.9.mod/debian/changelog --- vilistextum-2.6.9/debian/changelog 2010-02-04 23:41:53.000000000 +0100 +++ vilistextum-2.6.9.mod/debian/changelog 2010-02-04 23:48:05.000000000 +0100 @@ -1,3 +1,9 @@ +vilistextum (2.6.9-1.m) unstable; urgency=low + + * add multibyte support + + -- hramrach <hramr...@heretic.chello.upc.cz> Thu, 04 Feb 2010 23:47:41 +0100 + vilistextum (2.6.9-1) unstable; urgency=low * Initial release (Closes: #516972). Only in vilistextum-2.6.9.mod/debian: files diff -ur vilistextum-2.6.9/debian/rules vilistextum-2.6.9.mod/debian/rules --- vilistextum-2.6.9/debian/rules 2010-02-04 23:41:53.000000000 +0100 +++ vilistextum-2.6.9.mod/debian/rules 2010-02-04 23:50:27.000000000 +0100 @@ -4,3 +4,4 @@ include /usr/share/cdbs/1/class/autotools.mk DEB_INSTAL_DOCS_ALL = CHANGES +DEB_CONFIGURE_EXTRA_FLAGS = --enable-multibyte Only in vilistextum-2.6.9.mod/debian: stamp-autotools-files Only in vilistextum-2.6.9.mod/debian: stamp-makefile-build Only in vilistextum-2.6.9.mod/debian: vilistextum Only in vilistextum-2.6.9.mod/debian: vilistextum.debhelper.log Only in vilistextum-2.6.9.mod/debian: vilistextum.substvars Only in vilistextum-2.6.9.mod/doc: Makefile Only in vilistextum-2.6.9.mod: Makefile diff -ur vilistextum-2.6.9/src/charset.c vilistextum-2.6.9.mod/src/charset.c --- vilistextum-2.6.9/src/charset.c 2006-01-22 20:30:26.000000000 +0100 +++ vilistextum-2.6.9.mod/src/charset.c 2010-02-04 22:07:49.000000000 +0100 @@ -42,9 +42,9 @@ void init_multibyte() { char *ret; - ret = setlocale(LC_CTYPE, INTERNAL_LOCALE); + ret = setlocale(LC_CTYPE, internal_locale); if (ret==NULL) { - fprintf(stderr, "setlocale failed with: %s\n\n", INTERNAL_LOCALE); + fprintf(stderr, "setlocale failed with: %s\n\n", internal_locale); exit(1); } /*else { fprintf(stderr, "%s\n", ret); } */ Only in vilistextum-2.6.9.mod/src: charset.o diff -ur vilistextum-2.6.9/src/fileio.c vilistextum-2.6.9.mod/src/fileio.c --- vilistextum-2.6.9/src/fileio.c 2006-04-22 18:42:26.000000000 +0200 +++ vilistextum-2.6.9.mod/src/fileio.c 2010-02-04 22:07:14.000000000 +0100 @@ -100,9 +100,9 @@ output[strlen(output)] = '\0'; - ret = setlocale(LC_CTYPE, INTERNAL_LOCALE); + ret = setlocale(LC_CTYPE, internal_locale); if (ret==NULL) { - fprintf(stderr, "setlocale failed with: %s\n\n", INTERNAL_LOCALE); + fprintf(stderr, "setlocale failed with: %s\n\n", internal_locale); exit(1); } mbstowcs(converted_string, output, strlen(output)); Only in vilistextum-2.6.9.mod/src: fileio.o Only in vilistextum-2.6.9.mod/src: html.o Only in vilistextum-2.6.9.mod/src: html_tag.o Only in vilistextum-2.6.9.mod/src: latin1.o Only in vilistextum-2.6.9.mod/src: lists.o diff -ur vilistextum-2.6.9/src/main.c vilistextum-2.6.9.mod/src/main.c --- vilistextum-2.6.9/src/main.c 2006-05-01 07:37:43.000000000 +0200 +++ vilistextum-2.6.9.mod/src/main.c 2010-02-05 00:30:17.000000000 +0100 @@ -104,12 +104,35 @@ #include <stdio.h> #include <stdlib.h> +#ifdef MULTIBYTE +#include <locale.h> +#endif #include "html.h" #include "fileio.h" #include "charset.h" #include "debug.h" +#ifdef MULTIBYTE + +static int suffix(const char * str, const char * suffix) +{ + if ( strlen(str) < strlen(suffix) ) return 0; + if ( ! strcmp(suffix, str + ( strlen(str) - strlen(suffix) ) ) ) return 1; + return 0; +} + +static int utf_8_locale(const char * locale) +{ + if(!locale) return 0; + return suffix(locale,".utf8") || suffix(locale, ".UTF-8"); +} + +char * internal_locale = ""; + +#endif + + /* commandline options */ int palm = 0, /* if true, enable pda specific restrictions */ convert_tags = 0, /* if true, convert some tags (eg B,I) to ASCII characters */ @@ -151,9 +174,11 @@ " -s, --shrink-lines [NUMBER] \n" " -t, --no-title \n" " -w, --width NUMBER \n" +" -8, --no-utf-8 \n" #ifdef MULTIBYTE "\n" " -u, --output-utf-8 \n" +" -L, --utf-8-locale LOCALE \n" " -x, --translit \n" " -y, --charset CHARSET \n" #endif @@ -202,6 +227,8 @@ {"charset", 1, 0, 'y'}, {"translit", 0, 0, 'x'}, + {"no-utf-8", 0, 0, '8'}, + {"utf-8-locale", 1, 0, 'L'}, {"output-utf-8", 0, 0, 'u'}, {0, 0, 0, 0} @@ -216,11 +243,17 @@ } printf("\n"); #endif - + +#ifdef MULTIBYTE +#define OPTSTRING "pmsi:ce:hkltrnavy:xuL:8w:" +#else +#define OPTSTRING "pmsi:ce:hkltrnav8w:" +#endif + #ifdef HAVE_GETOPT_H - c = getopt_long (argc, argv, "pmsi:ce:hkltrnavy:xuw:", long_options, &option_index); + c = getopt_long (argc, argv, OPTSTRING, long_options, &option_index); #else - c = getopt(argc, argv, "pmsi:ce:hkltrnavy:xuw:"); + c = getopt(argc, argv, OPTSTRING); #endif #ifdef DEBUG fprintf(stderr, "c: %d c: %c \n\n", c,c); @@ -241,12 +274,20 @@ printf("Vilistextum %s (%s)\n", VERSION, RELEASEDATE); #endif exit(0); + break; #ifdef MULTIBYTE case 'y': set_iconv_charset(argument); break; + case 'L': internal_locale = argument; + option_output_utf8 = 1; + break; case 'x': transliteration = 1; break; case 'u': option_output_utf8 = 1; break; #endif - break; + case '8': +#ifdef MULTIBYTE + option_output_utf8 = 0; +#endif + break; case 'p': /* settings for PDAs */ hr_breite = 30; @@ -305,7 +346,10 @@ int main(int argc, char *argv[]) { + #ifdef MULTIBYTE + internal_locale = setlocale(LC_CTYPE, ""); + option_output_utf8 = utf_8_locale(internal_locale); init_multibyte(); use_default_charset(); #endif Only in vilistextum-2.6.9.mod/src: main.c~ diff -ur vilistextum-2.6.9/src/main.h vilistextum-2.6.9.mod/src/main.h --- vilistextum-2.6.9/src/main.h 2005-07-23 18:47:44.000000000 +0200 +++ vilistextum-2.6.9.mod/src/main.h 2010-02-04 23:24:19.000000000 +0100 @@ -19,6 +19,8 @@ int option_no_alt; int option_output_utf8; +char * internal_locale; + CHAR *default_image; #endif Only in vilistextum-2.6.9.mod/src: main.o Only in vilistextum-2.6.9.mod/src: Makefile Only in vilistextum-2.6.9.mod/src: microsoft.o Only in vilistextum-2.6.9.mod/src: text.o Only in vilistextum-2.6.9.mod/src: unicode_entities.o Only in vilistextum-2.6.9.mod/src: util.o Only in vilistextum-2.6.9.mod/src: vilistextum Only in vilistextum-2.6.9.mod: stamp-h1 diff -ur vilistextum-2.6.9/tests/check_entities vilistextum-2.6.9.mod/tests/check_entities --- vilistextum-2.6.9/tests/check_entities 2006-04-25 13:42:45.000000000 +0200 +++ vilistextum-2.6.9.mod/tests/check_entities 2010-02-05 00:18:25.000000000 +0100 @@ -3,7 +3,7 @@ echo "" echo "TEST: $0" rm -f entities.output.test -../src/vilistextum 2>/dev/null -m entities.html entities.output.test +../src/vilistextum 2>/dev/null -m8 entities.html entities.output.test diff >/dev/null entities.output entities.output.test diff -ur vilistextum-2.6.9/tests/check_latin2 vilistextum-2.6.9.mod/tests/check_latin2 --- vilistextum-2.6.9/tests/check_latin2 2006-04-25 13:42:51.000000000 +0200 +++ vilistextum-2.6.9.mod/tests/check_latin2 2010-02-05 00:17:51.000000000 +0100 @@ -7,7 +7,7 @@ ../src/vilistextum -v | grep >/dev/null multibyte || exit 0 rm -f latin2.output.test -../src/vilistextum latin2.html latin2.output.test +../src/vilistextum -8 latin2.html latin2.output.test diff >/dev/null latin2.output latin2.output.test Only in vilistextum-2.6.9.mod/tests: entities.output.test Only in vilistextum-2.6.9.mod/tests: latin2.output.test Only in vilistextum-2.6.9.mod/tests: Makefile Only in vilistextum-2.6.9.mod/tests: tags.output.test Only in vilistextum-2.6.9.mod/tests: utf-8-sampler.output.test