Hi! Ok, I attached the wrong patch. This one should be better.
-- Kind regards, +--------------------------------------------------------------------+ | Bas Zoetekouw | GPG key: 0644fab7 | |----------------------------| Fingerprint: c1f5 f24c d514 3fec 8bf6 | | [EMAIL PROTECTED], [EMAIL PROTECTED] | a2b1 2bae e41f 0644 fab7 | +--------------------------------------------------------------------+
diff -Naur slrn-0.9.8.1pl1.eerst/debian/changelog slrn-0.9.8.1pl1/debian/changelog --- slrn-0.9.8.1pl1.eerst/debian/changelog 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/debian/changelog 2006-03-31 19:14:46.000000000 +0200 @@ -1,3 +1,9 @@ +slrn (0.9.8.1pl1-17) experimental; urgency=low + + * Fixes to the charset patch + + -- Bas Zoetekouw <[EMAIL PROTECTED]> Fri, 31 Mar 2006 18:05:14 +0200 + slrn (0.9.8.1pl1-16) experimental; urgency=low * Added charset patch from Bas Zoetekouw. (closes: #316919) diff -Naur slrn-0.9.8.1pl1.eerst/debian/patches/00list slrn-0.9.8.1pl1/debian/patches/00list --- slrn-0.9.8.1pl1.eerst/debian/patches/00list 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/debian/patches/00list 2006-03-31 19:14:38.000000000 +0200 @@ -11,4 +11,3 @@ 208_slrnrc-conv.diff 209_optic.diff 210_doc-fix.diff -300_iconv.diff diff -Naur slrn-0.9.8.1pl1.eerst/configure.in slrn-0.9.8.1pl1/configure.in --- slrn-0.9.8.1pl1.eerst/configure.in 2006-03-31 18:03:55.000000000 +0200 +++ slrn-0.9.8.1pl1/configure.in 2006-04-03 23:08:05.000000000 +0200 @@ -171,6 +171,7 @@ fi + dnl enforce the use of inews AH_VERBATIM([SLRN_FORCE_INEWS], [/* define if you want to force the use of inews */ @@ -353,6 +354,35 @@ AC_DEFINE(SLRN_HAS_SPOOL_SUPPORT, 1) fi + +dnl iconv +AM_ICONV +AH_VERBATIM([SLRN_USE_ICONV], +[/* define this if you want to use iconv */ +#undef SLRN_USE_ICONV]) +AC_ARG_ENABLE([iconv], + [ --enable-iconv Enable use of iconv library], + AC_DEFINE([SLRN_USE_ICONV]) + use_iconv=yes, + use_iconv=no) + +AH_VERBATIM([USE_ICONV], +[/* This is defined if use of iconv is requested _and_ libiconv is available */ +#undef USE_ICONV]) + +if test "x${use_iconv}" = "xyes"; +then + if test "x${am_cv_func_iconv}" != "xyes" + then + AC_MSG_FAILURE([Use of libiconv was requested, but the iconv library was not found. Maybe you need to specify --with-libiconv-prefix?]); + fi + AC_MSG_NOTICE([We're using iconv]) + AC_DEFINE([USE_ICONV]) +else + AC_MSG_NOTICE([We're NOT using iconv]) +fi + + AH_BOTTOM( [/* misc settings copied from the original config.hin file */ diff -Naur slrn-0.9.8.1pl1.eerst/src/art.c slrn-0.9.8.1pl1/src/art.c --- slrn-0.9.8.1pl1.eerst/src/art.c 2006-03-31 18:03:55.000000000 +0200 +++ slrn-0.9.8.1pl1/src/art.c 2006-04-03 23:08:05.000000000 +0200 @@ -79,6 +79,13 @@ # include "grplens.h" #endif +/* don't use recode for slrnpull */ +#if defined(SLRNPULL_CODE) && defined(USE_ICONV) +# undef USE_ICONV +#endif + + + /*}}}*/ /*{{{ extern Global variables */ @@ -616,6 +623,9 @@ Slrn_Current_Article = NULL; free_article_lines (a); +#ifdef USE_ICONV + slrn_free (a->charset); +#endif slrn_free ((char *) a); } @@ -2544,8 +2554,8 @@ #if SLRN_HAS_MIME if ((do_mime == 0) && (Slrn_Use_Mime & MIME_DISPLAY)) { - slrn_rfc1522_decode_string (tmp); - slrn_rfc1522_decode_string (h->from); + slrn_rfc1522_decode_string (&tmp); + slrn_rfc1522_decode_string (&(h->from)); } #endif slrn_free (h->realname); @@ -5509,22 +5519,44 @@ static Slrn_Header_Type *process_xover (Slrn_XOver_Type *xov) { Slrn_Header_Type *h; + unsigned char *c; h = (Slrn_Header_Type *) slrn_safe_malloc (sizeof (Slrn_Header_Type)); slrn_map_xover_to_header (xov, h); Number_Total++; +#ifdef USE_ICONV + /* ok, some news client (Outlook Express *sigh*) just put unencoded + * latin1/9 chars in their headers. As we don't know any charset at + * this time, replace those chars by '?' chars */ + c = h->subject; + while (*c!='\0' && *c!=0x0a && *c!=0x0d) + { + if (*c>=0x7f) *c = '?'; c++; + } + c = h->from; + while (*c!='\0' && *c!=0x0a && *c!=0x0d) + { + if (*c>=0x7f) *c = '?'; c++; + } +#endif /* USE_ICONV */ + #if SLRN_HAS_MIME if (Slrn_Use_Mime & MIME_DISPLAY) { - slrn_rfc1522_decode_string (h->subject); - slrn_rfc1522_decode_string (h->from); + slrn_rfc1522_decode_string (&(h->subject)); + slrn_rfc1522_decode_string (&(h->from)); } #endif get_header_real_name (h); +#ifdef USE_ICONV + /* TODO: do we translate here, or do we do it + * in slrn_rfc1522_decode_string? */ +#else /* USE_ICONV */ slrn_chmap_fix_header (h); +#endif /* USE_ICONV */ #if SLRN_HAS_GROUPLENS if (Slrn_Use_Group_Lens) diff -Naur slrn-0.9.8.1pl1.eerst/src/art.h slrn-0.9.8.1pl1/src/art.h --- slrn-0.9.8.1pl1.eerst/src/art.h 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/art.h 2006-03-31 19:14:46.000000000 +0200 @@ -195,6 +195,9 @@ int mime_needs_metamail; #endif int needs_sync; /* non-zero if line number/current line needs updated */ +#ifdef USE_ICONV + char * charset; /* charset from content-type header */ +#endif } Slrn_Article_Type; diff -Naur slrn-0.9.8.1pl1.eerst/src/chmap.c slrn-0.9.8.1pl1/src/chmap.c --- slrn-0.9.8.1pl1.eerst/src/chmap.c 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/chmap.c 2006-03-31 21:12:27.000000000 +0200 @@ -1,4 +1,6 @@ /* -*- mode: C; mode: fold; -*- */ +/* vim:ts=8:sw=2:expandtab + */ /* This file is part of SLRN. @@ -47,15 +49,149 @@ #include "art.h" #include "chmap.h" -#if SLRN_HAS_CHARACTER_MAP -char *Slrn_Charset; +#ifdef USE_ICONV +#include <locale.h> +#include <langinfo.h> +#include <iconv.h> +#include <errno.h> +#endif -static unsigned char *ChMap_To_Iso_Map; -static unsigned char *ChMap_From_Iso_Map; +/* don't use recode for slrnpull */ +#if defined(SLRNPULL_CODE) && defined(USE_ICONV) +# undef USE_ICONV +#endif + + +#if SLRN_HAS_CHARACTER_MAP || defined(USE_ICONV) + +/* if we use recode, this is set from the environment locale, otherwise it is + * specified by the user in the config file */ +char *Slrn_Charset; /* This include file contains static globals */ # include "charmaps.h" +#endif /* SLRN_HAS_CHARACTER_MAP || defined(USE_ICONV) */ + +#ifdef USE_ICONV + +const iconv_t ICONV_FAIL = (iconv_t) -1; + +/* translate the string *str_ptr from charset cs_from to charset cs_to */ +/* str_ptr will be freed and reallocated */ +char * slrn_chmap_translate_string ( + char *cs_from, char *cs_to, char **str_ptr) + { + iconv_t cd; + char *retval; + char *cs_to_translit; + char *str = *str_ptr; + size_t in_len, in_left, out_len, out_left; + char *in_start, *in_cursor, *out_start, *out_cursor; + size_t num; + + + /* make sure the charsets are initialized */ + if (cs_from == NULL) cs_from = ICONV_DEFAULT_CHARSET; + if (cs_to == NULL) return *str_ptr; + + /* don't translate if from and to charsets are equal */ + if (strcasecmp (cs_from, cs_to) == 0) + return 0; + + /* concat "//translit" to cs_to */ + cs_to_translit = slrn_safe_malloc( strlen (cs_to) + 10 + 1); + sprintf(cs_to_translit, "%s%s", cs_to, "//translit"); + + /* Initialize new translation description */ + /* TODO: cache this and check cs_from and cs_to every time */ + cd = iconv_open(cs_to_translit, cs_from); + if (cd == ICONV_FAIL) + { + slrn_error (_("Unsupported translation: %s->%s"), cs_from, cs_to); + return NULL; + } + + /* number of bytes left in input/output buffers */ + in_left = in_len = strlen (str); + out_left = out_len = 2 * strlen(str); + in_cursor = in_start = str; + out_cursor = out_start = slrn_safe_malloc( out_left + 1 ); + + /* iterate until the entire line is translated */ + while (1) + { + /* stop if there is nothing left to translate */ + if (in_left == 0) + { + break; + } + + /* do the conversion */ + num = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left); + + /* the entire line was translated, we're done */ + if (num>=0) + { + break; + } + + /* otherwise, an error occured */ + switch (errno) /* these are the only error that can occur */ + { + case EILSEQ: /* invalid byte sequence at pos in_cursor */ + /* skip the invalid byte and continue */ + if (in_left>0) /* otherwise we're done anyway */ + { + in_left--; + in_cursor++; + } + break; + case E2BIG: /* output buff is full */ + /* realloc the output buffer (make it 2 times as large) */ + slrn_realloc(out_cursor, 2*out_len, 1); + out_left = out_len; + out_len *= 2; + break; + case EINVAL: /* incomplete byte sequence at end of string*/ + /* just skip the rest of the line */ + in_left = 0; + break; + case EBADF: /* cd is invalid */ + slrn_error(_("Internal error while translating string")); + in_left = 0; + break; + default: /* never reached */ + slrn_error(_("A unknown error occurred. This should not happen.")); + in_left = 0; + break; + } + } + + /* make sure string ends in a \0 */ + *out_cursor = '\0'; + + /* now copy the output buffer to a newly allocated string */ + retval = slrn_safe_strmalloc (out_start); + + /* free the old input string, and set it to the new result */ + slrn_free (str); + *str_ptr = retval; + + /* free variables */ + slrn_free (out_start); + slrn_free (cs_to_translit); + iconv_close (cd); + + return retval; + } + +#else /* USE_ICONV */ +# if SLRN_HAS_CHARACTER_MAP + +static unsigned char *ChMap_To_Iso_Map; +static unsigned char *ChMap_From_Iso_Map; + static void chmap_map_string (char *str, unsigned char *map) { unsigned char ch; @@ -69,11 +205,11 @@ static void chmap_map_string_from_iso (char *str) { -# if SLANG_VERSION >= 20000 +# if SLANG_VERSION >= 20000 /* fixme */ if (Slrn_UTF8_Mode) return; -#endif +# endif chmap_map_string (str, ChMap_From_Iso_Map); } @@ -87,13 +223,17 @@ chmap_map_string (str, ChMap_To_Iso_Map); } -#endif +# endif /* SLRN_HAS_CHARACTER_MAP */ + +#endif /* USE_ICONV */ /* Fix a single header; the rest of the header lines are dealt with * later in hide_art_headers() */ +#ifndef USE_ICONV /* recode handles the translation directly while + decoding the rfc1522 */ void slrn_chmap_fix_header (Slrn_Header_Type *h) { -#if SLRN_HAS_CHARACTER_MAP +#if SLRN_HAS_CHARACTER_MAP if ((h->flags & HEADER_CHMAP_PROCESSED) == 0) { chmap_map_string_from_iso (h->subject); @@ -101,32 +241,62 @@ chmap_map_string_from_iso (h->realname); h->flags |= HEADER_CHMAP_PROCESSED; } -#endif +#endif /* SLRN_HAS_CHARACTER_MAP */ } +#endif + void slrn_chmap_fix_body (Slrn_Article_Type *a, int revert) { -#if SLRN_HAS_CHARACTER_MAP +#if SLRN_HAS_CHARACTER_MAP || USE_ICONV Slrn_Article_Line_Type *l; + char * charset; if (a == NULL) return; + +#ifdef USE_ICONV + /* check if we need to translate */ + if (a->charset == NULL) + charset = ICONV_DEFAULT_CHARSET; + else + charset = a->charset; + + if (strcasecmp (Slrn_Charset, charset) == 0) + return; +#endif + l = a->lines; while (l != NULL) { - if (revert) - chmap_map_string_to_iso (l->buf); - else - chmap_map_string_from_iso (l->buf); - l = l->next; +# ifdef USE_ICONV + /* don't process headers */ + if (l->flags & HEADER_LINE) + { + l = l->next; + continue; + } + + if (revert) + slrn_chmap_translate_string (Slrn_Charset, charset, &(l->buf)); + else + slrn_chmap_translate_string (charset, Slrn_Charset, &(l->buf)); +# else /* USE_ICONV */ + if (revert) + chmap_map_string_to_iso (l->buf); + else + chmap_map_string_from_iso (l->buf); +# endif /* USE_ICONV */ + + l = l->next; } #endif } int slrn_chmap_fix_file (char *file, int reverse) { -#if SLRN_HAS_CHARACTER_MAP +#if SLRN_HAS_CHARACTER_MAP FILE *fp, *tmpfp; char buf [4096]; char tmp_file [SLRN_MAX_PATH_LEN]; @@ -170,8 +340,11 @@ ret = 0; while (NULL != fgets (buf, sizeof (buf), fp)) { +#ifdef USE_ICONV +#else if (reverse) chmap_map_string_from_iso (buf); else chmap_map_string_to_iso (buf); +#endif if (EOF == fputs (buf, tmpfp)) { slrn_error (_("Write Error. Disk Full? --- message not posted.")); @@ -219,8 +392,39 @@ } #endif + + int slrn_set_charset (char *name) { +#if USE_ICONV + iconv_t cd; + + /* use environenment for locale */ + setlocale (LC_ALL, ""); + + /* get charset of current locale */ + Slrn_Charset = slrn_safe_strmalloc (nl_langinfo (CODESET)); + + /* TODO: check that we don't have any UCS2 and UCS4 charsets, as those + * can't be handled at all. Slrn works with 0-terminated strings, which + * is totally incompatible with these charsets. Luckily those are extremely + * uncommon anyway on usenet. */ + + /* initialize recode engine to check if Slrn_Charset is valid */ + cd = iconv_open ("UTF-8", Slrn_Charset); + if (cd == ICONV_FAIL) + { + slrn_error (_("Unsupport character set: %s"), Slrn_Charset); + return -1; + } + + /* free variables */ + iconv_close (cd); + + return 0; + +#else /* USE_ICONV */ + #if SLRN_HAS_CHARACTER_MAP CharMap_Type *map; unsigned int i; @@ -254,4 +458,5 @@ (void) name; return -1; #endif +#endif /* USE_ICONV */ } diff -Naur slrn-0.9.8.1pl1.eerst/src/chmap.h slrn-0.9.8.1pl1/src/chmap.h --- slrn-0.9.8.1pl1.eerst/src/chmap.h 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/chmap.h 2006-03-31 19:14:46.000000000 +0200 @@ -20,8 +20,20 @@ */ #ifndef _SLRN_CHMAP_H #define _SLRN_CHMAP_H + +/* don't use recode for slrnpull */ +#if defined(SLRNPULL_CODE) && defined(USE_ICONV) +# undef USE_ICONV +#endif + +#define ICONV_DEFAULT_CHARSET "iso-8859-15" + extern int slrn_set_charset (char *); extern int slrn_chmap_fix_file (char *, int); +#ifdef USE_ICONV +extern char * slrn_chmap_translate_string ( + char *, char *, char **); +#endif extern void slrn_chmap_fix_body (Slrn_Article_Type *, int); extern void slrn_chmap_fix_header (Slrn_Header_Type *); diff -Naur slrn-0.9.8.1pl1.eerst/src/config.h.in slrn-0.9.8.1pl1/src/config.h.in --- slrn-0.9.8.1pl1.eerst/src/config.h.in 2006-03-31 18:03:55.000000000 +0200 +++ slrn-0.9.8.1pl1/src/config.h.in 2006-03-31 21:32:31.000000000 +0200 @@ -167,6 +167,9 @@ /* Define to 1 if you have the `vsnprintf' function. */ #undef HAVE_VSNPRINTF +/* Define as const if the declaration of iconv() needs const. */ +#undef ICONV_CONST + /* Define the directory where your locales are */ #undef LOCALEDIR @@ -263,9 +266,15 @@ /* sendmail command */ #undef SLRN_SENDMAIL_COMMAND +/* define this if you want to use iconv */ +#undef SLRN_USE_ICONV + /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS +/* This is defined if use of iconv is requested _and_ libiconv is available */ +#undef USE_ICONV + /* define if you have va_copy() in stdarg.h */ #undef VA_COPY diff -Naur slrn-0.9.8.1pl1.eerst/src/mime.c slrn-0.9.8.1pl1/src/mime.c --- slrn-0.9.8.1pl1.eerst/src/mime.c 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/mime.c 2006-03-31 20:29:43.000000000 +0200 @@ -1,4 +1,6 @@ /* -*- mode: C; mode: fold -*- */ +/* vim:ts=8:expandtab: + */ /* MIME handling routines. * * Author: Michael Elkins <[EMAIL PROTECTED]> @@ -46,8 +48,15 @@ #include "util.h" #include "server.h" #include "snprintf.h" +#include "chmap.h" #include "mime.h" +/* don't use recode for slrnpull */ +#if defined(SLRNPULL_CODE) && defined(USE_ICONV) +# undef USE_ICONV +#endif + + #if ! SLRN_HAS_MIME int Slrn_Use_Mime = 0; #else /* rest of file in this ifdef */ @@ -60,6 +69,12 @@ char *Slrn_Mime_Display_Charset; +#ifdef USE_ICONV +static char *Compatible_Charsets[] = +{ + "US-ASCII", /* This MUST be zeroth element */ +}; +#else /* USE_ICONV */ /* These are all supersets of US-ASCII. Only the first N characters are * matched, where N is the length of the table entry. */ @@ -73,6 +88,7 @@ "utf-8", /* we now have a function to decode this */ NULL }; +#endif /* USE_ICONV */ #ifndef SLRNPULL_CODE static char *Char_Set; @@ -120,6 +136,7 @@ int slrn_set_compatible_charsets (char *charsets) { +#ifndef USE_ICONV static char* buf; char *p; char **pp; @@ -166,10 +183,12 @@ *pp = NULL; +#endif /* USE_ICONV */ return 0; } #endif /* NOT SLRNPULL_CODE */ +#ifndef USE_ICONV static char *_find_compatible_charset (char **compat_charset, char *cs, unsigned int len) { @@ -187,9 +206,14 @@ } return NULL; } +#endif /* USE_RECOEE */ static char *find_compatible_charset (char *cs, unsigned int len) { +#ifdef USE_ICONV + return slrn_strnmalloc (cs, len, 1); +#else /* USE_ICONV */ + char *retval; if ((NULL == (retval = _find_compatible_charset (Compatible_Charsets, cs, @@ -198,6 +222,7 @@ retval = _find_compatible_charset (Custom_Compatible_Charsets, cs, len); return retval; +#endif /* USE_ICONV */ } #ifndef SLRNPULL_CODE @@ -291,13 +316,18 @@ len = b - charset; Char_Set = find_compatible_charset (charset, len); - return 0; + goto END; /* EVIL */ } line = line->next; } while ((line != NULL) && (line->flags & HEADER_LINE) && ((*(b = line->buf) == ' ') || (*b == '\t'))); + +END: +#ifdef USE_ICONV + a->charset = slrn_strmalloc (Char_Set, 1); +#endif return 0; } @@ -413,6 +443,7 @@ return dest; } +#ifndef USE_ICONV static char *utf_to_unicode (int *out, char *in, char *srcmax) { int mask = 0; @@ -495,15 +526,21 @@ return dest; } +#endif /* USE_ICONV */ -int slrn_rfc1522_decode_string (char *s) +int slrn_rfc1522_decode_string (char **str_ptr) { + char *s = *str_ptr; char *s1, *s2, ch; char *charset, method, *txt; char *after_last_encoded_word; char *after_whitespace; unsigned int count; unsigned int len; +#ifdef USE_ICONV + char *buff, *begin_of_encoded_part, *new_str; + char *output; +#endif count = 0; after_whitespace = NULL; @@ -512,10 +549,14 @@ /* Even if some user agents still send raw 8bit, it is safe to call * decode_utf8() -- if it finds 8bit chars that are not valid UTF-8, it * will set ch to 1 and we can leave the line untouched. */ -#if SLANG_VERSION >= 20000 +#ifdef USE_ICONV + output = slrn_safe_strmalloc ( *str_ptr ); + s = output; +#else +# if SLANG_VERSION >= 20000 if (Slrn_UTF8_Mode == 0) { -#endif +# endif len = strlen (s); s1 = slrn_safe_malloc(len + 1); @@ -525,9 +566,10 @@ if (ch == 0) strcpy (s, s1); /* safe */ slrn_free (s1); -#if SLANG_VERSION >= 20000 +# if SLANG_VERSION >= 20000 } -#endif +# endif +#endif /* USE_ICONV */ while (1) { @@ -536,6 +578,9 @@ if (s == NULL) break; s1 = s; +#ifdef USE_ICONV + begin_of_encoded_part = s1; +#endif charset = s = s1 + 2; while (((ch = *s) != 0) && (ch != '?') && (ch != ' ') && (ch != '\t') && (ch != '\n')) @@ -592,17 +637,67 @@ /* Note: these functions return a pointer to the END of the decoded * text. */ + +#ifdef USE_ICONV + /* decoded stuff is always shorter than encoded, so this size + * is safe */ + /* TODO: I don't think this is always true, specifically consider + * UTF8 encoded 4-byte characters */ + //buff = (char *) slrn_safe_malloc( strlen(txt) + 10 ); + len = strlen(txt) + 10; + buff = (char *) calloc( len, sizeof(char) ); + s1 = buff; +#endif + s2 = s1; if (method == 'B') s1 = decode_base64 (s1, txt, s); - else s1 = decode_quoted_printable (s1, txt, s, 1, 0); - + else + s1 = decode_quoted_printable (s1, txt, s, 1, 0); + +#ifdef USE_ICONV + /* ok, now the decoded string is in buff, now translate it */ + slrn_chmap_translate_string( charset, Slrn_Charset, &buff ); + + /* now we have to improvise a bit, because the translated string + * could, in principle be longer than the space that's available in + * the original string. + * So, we allocate a _new_ string to replace it, and copy everything + * in there */ + /* skip the final '?=', if the string doesn't end prematurely */ + if (*s!='\0' && *(s+1)!='\0') s+=2; + len = begin_of_encoded_part - output /* part that is done already */ + + strlen (buff) /* part we just translated */ + + strlen (s) /* part that's yet to be translated */ + + 10; /* to be on the save side */ + new_str = (char *) slrn_safe_malloc( len * sizeof(char) ); + s1 = new_str; + memcpy (s1, output, begin_of_encoded_part - output); /* old part */ + s1 += begin_of_encoded_part - output; + memcpy (s1, buff, strlen (buff)); /* current part */ + s1 += strlen (buff); + memcpy (s1, s, strlen (s) + 1 ); /* todo part, including final \0 */ + + /* now free the old string and set it to the new one */ + memset(output, 'x', strlen(output)); + slrn_free (output); + output = new_str; + + /* and free the buffer */ + slrn_free(buff); + buff = NULL; + + /* set s to the position where we need to continue on the next + * iteration */ + s = s1; + +#else /* USE_ICONV */ if ((slrn_case_strncmp((unsigned char *)"utf-8", (unsigned char *)charset, 5) == 0) -#if SLANG_VERSION >= 20000 +# if SLANG_VERSION >= 20000 && (Slrn_UTF8_Mode == 0) -#endif +# endif ) s1 = decode_utf8 (s2, s2, s1, NULL); @@ -611,6 +706,7 @@ s = s1; /* start from here next loop */ while ((ch = *s2++) != 0) *s1++ = ch; *s1 = 0; +#endif count++; @@ -618,6 +714,12 @@ s = slrn_skip_whitespace (s); after_whitespace = s; } + +#ifdef USE_ICONV + // slrn_free( *str_ptr ); + *str_ptr = output; +#endif + return count; } @@ -627,6 +729,14 @@ static void rfc1522_decode_headers (Slrn_Article_Type *a) { Slrn_Article_Line_Type *line; + +#ifdef USE_ICONV + /* make sure we have a charset available */ + if (a->charset==NULL) + { + a->charset = slrn_safe_strmalloc(ICONV_DEFAULT_CHARSET); + } +#endif /* USE_ICONV */ if (a == NULL) return; @@ -638,12 +748,26 @@ if (slrn_case_strncmp ((unsigned char *)line->buf, (unsigned char *)"Newsgroups:", 11) && slrn_case_strncmp ((unsigned char *)line->buf, - (unsigned char *)"Followup-To:", 12) && - slrn_rfc1522_decode_string (line->buf)) - { - a->is_modified = 1; - a->mime_was_modified = 1; - } + (unsigned char *)"Followup-To:", 12) + ) + { + /* ok, some news client (Outlook Express *sigh*) just put + * unencoded latin1/9/window1252 chars in their headers. Try to + * decode this */ + if (slrn_chmap_translate_string(a->charset , Slrn_Charset, + &(line->buf))) + { + a->is_modified = 1; + a->mime_was_modified = 1; + } + + /* now do the usual rfc 1522 decoding */ + if (slrn_rfc1522_decode_string (&(line->buf))) + { + a->is_modified = 1; + a->mime_was_modified = 1; + } + } line = line->next; } } @@ -845,6 +969,7 @@ } } +#ifndef USE_ICONV static void decode_mime_utf8 (Slrn_Article_Type *a) { Slrn_Article_Line_Type *line; @@ -879,6 +1004,7 @@ line = line->next; } } +#endif /* USE_ICONV */ void slrn_mime_article_init (Slrn_Article_Type *a) { @@ -894,8 +1020,6 @@ a->mime_was_parsed = 1; /* or will be */ - rfc1522_decode_headers (a); - /* Is there a reason to use the following line? */ /* if (NULL == find_header_line (a, "Mime-Version:")) return;*/ if ((-1 == parse_content_type_line (a)) @@ -904,6 +1028,8 @@ a->mime_needs_metamail = 1; return; } + + rfc1522_decode_headers (a); switch (Encoding_Method) { @@ -926,14 +1052,16 @@ return; } +#ifndef USE_ICONV if ((a->mime_needs_metamail == 0) && -#if SLANG_VERSION >= 20000 +# if SLANG_VERSION >= 20000 (Slrn_UTF8_Mode == 0) && -#endif +# endif (Char_Set != NULL) && (slrn_case_strncmp((unsigned char *)"utf-8", (unsigned char *)Char_Set, 5) == 0)) decode_mime_utf8 (a); +#endif /* USE_ICONV */ } #ifndef MAXPATHLEN @@ -1525,6 +1653,10 @@ if (Mime_Posting_Charset == NULL) Mime_Posting_Charset = "us-ascii"; + /* TODO: check that a charset header isn't already present */ + /* so: (1) extract possible charset header and use it to override + * Mime_Posting_Charset here; (2) remove possible duplicate headers + * that we are going to add; (3) add headers as below */ switch (Mime_Posting_Encoding) { default: diff -Naur slrn-0.9.8.1pl1.eerst/src/mime.h slrn-0.9.8.1pl1/src/mime.h --- slrn-0.9.8.1pl1.eerst/src/mime.h 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/mime.h 2006-03-31 19:14:46.000000000 +0200 @@ -1,6 +1,11 @@ #ifndef _SLRN_MIME_H #define _SLRN_MIME_H +/* don't use recode for slrnpull */ +#if defined(SLRNPULL_CODE) && defined(USE_ICONV) +# undef USE_ICONV +#endif + #include "vfile.h" extern int Slrn_Use_Mime; @@ -11,7 +16,7 @@ # if SLRN_HAS_MIME /* rest of file in this if */ extern int slrn_set_compatible_charsets (char *); -extern int slrn_rfc1522_decode_string (char *); +extern int slrn_rfc1522_decode_string (char **); extern VFILE *slrn_mime_encode (VFILE *); extern void slrn_mime_header_encode (char *, unsigned int); diff -Naur slrn-0.9.8.1pl1.eerst/src/slrn.c slrn-0.9.8.1pl1/src/slrn.c --- slrn-0.9.8.1pl1.eerst/src/slrn.c 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/slrn.c 2006-04-03 23:08:05.000000000 +0200 @@ -107,8 +107,15 @@ # include <windows.h> #endif +/* don't use recode for slrnpull */ +#if defined(SLRNPULL_CODE) && defined(USE_ICONV) +# undef USE_ICONV +#endif + + /*}}}*/ + /*{{{ Global Variables */ #if SLANG_VERSION >= 20000 @@ -1219,7 +1226,7 @@ if (Slrn_Post_Id == 0) Slrn_Post_Id = Slrn_Default_Post_Obj; if (no_new_groups) Slrn_Check_New_Groups = 0; -#if SLRN_HAS_CHARACTER_MAP +#if SLRN_HAS_CHARACTER_MAP || defined(USE_ICONV) if (-1 == slrn_set_charset (Slrn_Charset)) { slrn_chmap_show_supported (); @@ -1227,10 +1234,13 @@ } #endif -#if SLRN_HAS_MIME +#ifdef USE_ICONV + Slrn_Mime_Display_Charset = slrn_safe_strmalloc (Slrn_Charset); +#else /* USE_ICONV */ +# if SLRN_HAS_MIME if (NULL == Slrn_Mime_Display_Charset) - { -# if SLRN_HAS_CHARACTER_MAP + { +# if SLRN_HAS_CHARACTER_MAP if (NULL != Slrn_Charset) { if (0 == slrn_case_strcmp ((unsigned char *)Slrn_Charset, @@ -1247,10 +1257,11 @@ Slrn_Mime_Display_Charset = slrn_safe_strmalloc ("iso-8859-7"); } if (NULL == Slrn_Mime_Display_Charset) -# endif +# endif Slrn_Mime_Display_Charset = slrn_safe_strmalloc ("iso-8859-1"); +# endif } -#endif +#endif /* USE_ICONV */ #ifdef SIGINT if (Slrn_TT_Initialized == 0) diff -Naur slrn-0.9.8.1pl1.eerst/src/slrnpull.c slrn-0.9.8.1pl1/src/slrnpull.c --- slrn-0.9.8.1pl1.eerst/src/slrnpull.c 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/slrnpull.c 2006-03-31 19:14:46.000000000 +0200 @@ -1335,8 +1335,8 @@ h.from = slrn_safe_strmalloc (xov->from); if (Slrn_Use_Mime) { - slrn_rfc1522_decode_string (h.subject); - slrn_rfc1522_decode_string (h.from); + slrn_rfc1522_decode_string (&(h.subject)); + slrn_rfc1522_decode_string (&(h.from)); } #endif diff -Naur slrn-0.9.8.1pl1.eerst/src/startup.c slrn-0.9.8.1pl1/src/startup.c --- slrn-0.9.8.1pl1.eerst/src/startup.c 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/startup.c 2006-03-31 19:14:46.000000000 +0200 @@ -61,6 +61,13 @@ #ifdef VMS # include "vms.h" #endif + +/* don't use recode for slrnpull */ +#if defined(SLRNPULL_CODE) && defined(USE_ICONV) +# undef USE_ICONV +#endif + + /*}}}*/ /*{{{ Forward Function Declarations */ @@ -692,13 +699,15 @@ }, #if SLRN_HAS_MIME +# ifndef USE_ICONV /* this is fetched from the current locale */ {"mime_charset", &Slrn_Mime_Display_Charset}, +# endif /* USE_ICONV */ {"metamail_command", &Slrn_MetaMail_Cmd}, #else {"mime_charset", NULL}, {"metamail_command", NULL}, #endif -#if SLRN_HAS_CHARACTER_MAP +#if SLRN_HAS_CHARACTER_MAP && !defined(USE_ICONV) {"charset", &Slrn_Charset}, #else {"charset", NULL}, diff -Naur slrn-0.9.8.1pl1.eerst/src/xover.c slrn-0.9.8.1pl1/src/xover.c --- slrn-0.9.8.1pl1.eerst/src/xover.c 2006-03-31 18:03:56.000000000 +0200 +++ slrn-0.9.8.1pl1/src/xover.c 2006-03-31 19:41:04.000000000 +0200 @@ -155,7 +155,7 @@ (unsigned char *)"Newsgroups") && slrn_case_strcmp ((unsigned char *)h, (unsigned char *)"Followup-To")) - slrn_rfc1522_decode_string (colon); + slrn_rfc1522_decode_string (&colon); #endif addh->value = colon; break;