Hi Norbert! You wrote:
> * Bas Zoetekouw wrote: > > Would you rather consider adding a patch as I suggested above, that > > adds a new config option "default_header_charset" or so, which > > controls the charset to fall back to for non-encoded headers? I can > > see if I can hack on that later today... > > Any news here already? Yeah, it's done \o/ I made the first version of the patch last week already, but it had some bugs, and I haven't had any time to fix it until today. The patch is attached. It adds an option "fallback_charset" for slrnrc, which selects the default charset that is to be used if the header or the article doesn't speficy anything. Ideally, this fallback charset could be specified on a per-group basis, but that would make the patch a lot more complicated. The current patch, with only one global fallback, at least restores slrn's functionality of before the utf8 patch. The patch also fixes a small bug in the previous utf8 patch. Have fun! Bas. -- Kind regards, +--------------------------------------------------------------------+ | Bas Zoetekouw | GPG key: 0644fab7 | |----------------------------| Fingerprint: c1f5 f24c d514 3fec 8bf6 | | [EMAIL PROTECTED], [EMAIL PROTECTED] | a2b1 2bae e41f 0644 fab7 | +--------------------------------------------------------------------+
diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/00list slrn-0.9.8.1pl1/debian/patches/00list --- eerst/slrn-0.9.8.1pl1/debian/patches/00list 2007-01-12 22:14:32.000000000 +0100 +++ slrn-0.9.8.1pl1/debian/patches/00list 2007-01-15 20:33:20.827864867 +0100 @@ -14,3 +14,4 @@ 211_query-cutoff.diff 300_iconv.diff 301_warning.diff +302_fallback_charset.diff diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/300_iconv.diff slrn-0.9.8.1pl1/debian/patches/300_iconv.diff --- eerst/slrn-0.9.8.1pl1/debian/patches/300_iconv.diff 2007-01-12 22:14:32.000000000 +0100 +++ slrn-0.9.8.1pl1/debian/patches/300_iconv.diff 2007-01-15 20:34:04.012711486 +0100 @@ -259,7 +259,7 @@ + num = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left); + + /* the entire line was translated, we're done */ -+ if (num>=0) ++ if ( in_left == 0 ) + { + break; + } diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff --- eerst/slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff 1970-01-01 01:00:00.000000000 +0100 +++ slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff 2007-01-15 20:33:01.614068679 +0100 @@ -0,0 +1,212 @@ +#! /bin/sh -e +if [ $# -ne 1 ]; then + echo >&2 "`basename $0`: script expects -patch|-unpatch as argument" + exit 1 +fi +case "$1" in + -patch) patch -f --no-backup-if-mismatch -p1 < $0;; + -unpatch) patch -f --no-backup-if-mismatch -R -p1 < $0;; + *) + echo >&2 "`basename $0`: script expects -patch|-unpatch as argument" + exit 1;; +esac + +exit 0 + [EMAIL PROTECTED]@ +diff -Naur eerst/slrn-0.9.8.1pl1/src/art.c slrn-0.9.8.1pl1/src/art.c +--- eerst/slrn-0.9.8.1pl1/src/art.c 2007-01-15 18:54:23.795361911 +0100 ++++ slrn-0.9.8.1pl1/src/art.c 2007-01-15 18:51:24.808845511 +0100 +@@ -473,6 +473,9 @@ + remove_from_hash_table (h); + slrn_free (h->tree_ptr); + slrn_free (h->subject); ++#ifdef USE_ICONV /* we've copied this string */ ++ slrn_free (h->from); ++#endif + slrn_free (h->date); + slrn_free (h->realname); + slrn_free_additional_headers (h->add_hdrs); +@@ -5519,7 +5522,7 @@ + static Slrn_Header_Type *process_xover (Slrn_XOver_Type *xov) + { + Slrn_Header_Type *h; +- unsigned char *c; ++ char *c; + + h = (Slrn_Header_Type *) slrn_safe_malloc (sizeof (Slrn_Header_Type)); + +@@ -5527,19 +5530,44 @@ + Number_Total++; + + #ifdef USE_ICONV +- /* ok, some news client (Outlook Express *sigh*) just put unencoded ++ ++ /* Annoyingly, h->from wasn't malloced separately, but is part of the same ++ * buffer as h->subject (ie free()ing h->subject will also delete the memory ++ * space used by h->from(). That sucks, so make a copy of from, to make ++ * sure we can freely mess with the strings as we see fit. ++ * NOTE: h->from is free()ed in free_header() ++ */ ++ h->from = slrn_safe_strmalloc( h->from ); ++ ++ /* ok, some news clients (Outlook Express *sigh*) just put unencoded + * latin1/9 chars in their headers. As we don't know any charset at +- * this time, replace those chars by '?' chars */ +- c = h->subject; +- while (*c!='\0' && *c!=0x0a && *c!=0x0d) +- { +- if (*c>=0x7f) *c = '?'; c++; ++ * this time, try to translating them using the (user-specified) default ++ * charset, or if the user didn't specify any, replace all non-ASCII ++ * chars by question marks ++ */ ++ ++ /* if the user specified a fallback charset */ ++ if ( Slrn_Fallback_Input_Charset_Default == 0 ) ++ { ++ slrn_chmap_translate_string(Slrn_Fallback_Input_Charset, ++ Slrn_Charset, &(h->subject)); ++ slrn_chmap_translate_string(Slrn_Fallback_Input_Charset, ++ Slrn_Charset, &(h->from)); + } +- c = h->from; +- while (*c!='\0' && *c!=0x0a && *c!=0x0d) +- { ++ else /* no fallback charset specified, so replace all high chars by ? */ ++ { ++ c = h->subject; ++ while (*c!='\0' && *c!=0x0a && *c!=0x0d) ++ { + if (*c>=0x7f) *c = '?'; c++; ++ } ++ c = h->from; ++ while (*c!='\0' && *c!=0x0a && *c!=0x0d) ++ { ++ if (*c>=0x7f) *c = '?'; c++; ++ } + } ++ + #endif /* USE_ICONV */ + + #if SLRN_HAS_MIME +diff -Naur eerst/slrn-0.9.8.1pl1/src/art.h slrn-0.9.8.1pl1/src/art.h +--- eerst/slrn-0.9.8.1pl1/src/art.h 2007-01-15 18:54:23.796361774 +0100 ++++ slrn-0.9.8.1pl1/src/art.h 2007-01-12 22:40:58.470216033 +0100 +@@ -84,6 +84,11 @@ + + #endif /* NOT SLRNPULL_CODE */ + ++#ifdef USE_ICONV ++extern char *Slrn_Fallback_Input_Charset; ++extern short int Slrn_Fallback_Input_Charset_Default; ++#endif ++ + typedef struct Slrn_Header_Line_Type + { + char *name; +diff -Naur eerst/slrn-0.9.8.1pl1/src/chmap.c slrn-0.9.8.1pl1/src/chmap.c +--- eerst/slrn-0.9.8.1pl1/src/chmap.c 2007-01-15 18:54:23.798361500 +0100 ++++ slrn-0.9.8.1pl1/src/chmap.c 2007-01-15 19:01:51.939060247 +0100 +@@ -75,6 +75,11 @@ + + #ifdef USE_ICONV + ++/* the user-specified fallback charset */ ++char *Slrn_Fallback_Input_Charset = NULL; ++/* is set to a true value if the user didn't specify an override fallback */ ++short int Slrn_Fallback_Input_Charset_Default = 0; ++ + const iconv_t ICONV_FAIL = (iconv_t) -1; + + /* translate the string *str_ptr from charset cs_from to charset cs_to */ +@@ -92,7 +97,7 @@ + + + /* make sure the charsets are initialized */ +- if (cs_from == NULL) cs_from = ICONV_DEFAULT_CHARSET; ++ if (cs_from == NULL) cs_from = Slrn_Fallback_Input_Charset; + if (cs_to == NULL) return *str_ptr; + + /* don't translate if from and to charsets are equal */ +@@ -258,7 +263,7 @@ + #ifdef USE_ICONV + /* check if we need to translate */ + if (a->charset == NULL) +- charset = ICONV_DEFAULT_CHARSET; ++ charset = Slrn_Fallback_Input_Charset; + else + charset = a->charset; + +@@ -399,6 +404,22 @@ + #if USE_ICONV + iconv_t cd; + ++ /* first set the fallback input charset to ASCII if the user hasn't ++ * specified it */ ++ if ( Slrn_Fallback_Input_Charset != NULL ++ && *Slrn_Fallback_Input_Charset == '\0' ) ++ { ++ free( Slrn_Fallback_Input_Charset ); ++ } ++ if ( Slrn_Fallback_Input_Charset == NULL ) ++ { ++ Slrn_Fallback_Input_Charset = malloc( 9 ); ++ strncpy( Slrn_Fallback_Input_Charset, "US-ASCII", 9 ); ++ Slrn_Fallback_Input_Charset_Default = 1; ++ } ++ ++ /* the rest of this function deals with the _output_ charset */ ++ + /* use environenment for locale */ + setlocale (LC_ALL, ""); + +diff -Naur eerst/slrn-0.9.8.1pl1/src/chmap.h slrn-0.9.8.1pl1/src/chmap.h +--- eerst/slrn-0.9.8.1pl1/src/chmap.h 2007-01-15 18:54:23.798361500 +0100 ++++ slrn-0.9.8.1pl1/src/chmap.h 2007-01-15 18:50:00.857329255 +0100 +@@ -26,13 +26,12 @@ + # undef USE_ICONV + #endif + +-#define ICONV_DEFAULT_CHARSET "iso-8859-15" +- + extern int slrn_set_charset (char *); + extern int slrn_chmap_fix_file (char *, int); + #ifdef USE_ICONV + extern char * slrn_chmap_translate_string ( + char *, char *, char **); ++extern char *Slrn_Fallback_Input_Charset; + #endif + extern void slrn_chmap_fix_body (Slrn_Article_Type *, int); + extern void slrn_chmap_fix_header (Slrn_Header_Type *); +diff -Naur eerst/slrn-0.9.8.1pl1/src/mime.c slrn-0.9.8.1pl1/src/mime.c +--- eerst/slrn-0.9.8.1pl1/src/mime.c 2007-01-15 18:54:23.831356987 +0100 ++++ slrn-0.9.8.1pl1/src/mime.c 2007-01-15 18:49:38.805345750 +0100 +@@ -734,7 +734,7 @@ + /* make sure we have a charset available */ + if (a->charset==NULL) + { +- a->charset = slrn_safe_strmalloc(ICONV_DEFAULT_CHARSET); ++ a->charset = slrn_safe_strmalloc(Slrn_Fallback_Input_Charset); + } + #endif /* USE_ICONV */ + +diff -Naur eerst/slrn-0.9.8.1pl1/src/stamp-h1 slrn-0.9.8.1pl1/src/stamp-h1 +--- eerst/slrn-0.9.8.1pl1/src/stamp-h1 1970-01-01 01:00:00.000000000 +0100 ++++ slrn-0.9.8.1pl1/src/stamp-h1 2007-01-15 18:51:41.310588234 +0100 +@@ -0,0 +1 @@ ++timestamp for src/config.h +diff -Naur eerst/slrn-0.9.8.1pl1/src/startup.c slrn-0.9.8.1pl1/src/startup.c +--- eerst/slrn-0.9.8.1pl1/src/startup.c 2007-01-15 18:54:23.864352473 +0100 ++++ slrn-0.9.8.1pl1/src/startup.c 2007-01-12 22:16:05.556850947 +0100 +@@ -699,7 +699,9 @@ + }, + + #if SLRN_HAS_MIME +-# ifndef USE_ICONV /* this is fetched from the current locale */ ++# ifdef USE_ICONV ++ {"fallback_charset", &Slrn_Fallback_Input_Charset}, ++#else + {"mime_charset", &Slrn_Mime_Display_Charset}, + # endif /* USE_ICONV */ + {"metamail_command", &Slrn_MetaMail_Cmd},