Hi Norbert!

You wrote:

> * Bas Zoetekouw wrote:
> > Would you rather consider adding a patch as I suggested above, that
> > adds a new config option "default_header_charset" or so, which
> > controls the charset to fall back to for non-encoded headers? I can
> > see if I can hack on that later today...
> 
> Any news here already?

Yeah, it's done \o/
I made the first version of the patch last week already, but it had some
bugs, and I haven't had any time to fix it until today.

The patch is attached.  It adds an option "fallback_charset" for slrnrc,
which selects the default charset that is to be used if the header or
the article doesn't speficy anything.  Ideally, this fallback charset
could be specified on a per-group basis, but that would make the patch a
lot more complicated.  The current patch, with only one global fallback,
at least restores slrn's functionality of before the utf8 patch.

The patch also fixes a small bug in the previous utf8 patch.

Have fun!

Bas.

-- 
Kind regards,
+--------------------------------------------------------------------+
| Bas Zoetekouw              | GPG key: 0644fab7                     |
|----------------------------| Fingerprint: c1f5 f24c d514 3fec 8bf6 |
| [EMAIL PROTECTED], [EMAIL PROTECTED] |              a2b1 2bae e41f 0644 fab7 |
+--------------------------------------------------------------------+ 
diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/00list 
slrn-0.9.8.1pl1/debian/patches/00list
--- eerst/slrn-0.9.8.1pl1/debian/patches/00list 2007-01-12 22:14:32.000000000 
+0100
+++ slrn-0.9.8.1pl1/debian/patches/00list       2007-01-15 20:33:20.827864867 
+0100
@@ -14,3 +14,4 @@
 211_query-cutoff.diff
 300_iconv.diff
 301_warning.diff
+302_fallback_charset.diff
diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/300_iconv.diff 
slrn-0.9.8.1pl1/debian/patches/300_iconv.diff
--- eerst/slrn-0.9.8.1pl1/debian/patches/300_iconv.diff 2007-01-12 
22:14:32.000000000 +0100
+++ slrn-0.9.8.1pl1/debian/patches/300_iconv.diff       2007-01-15 
20:34:04.012711486 +0100
@@ -259,7 +259,7 @@
 +      num = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left);
 +
 +      /* the entire line was translated, we're done */
-+      if (num>=0) 
++      if ( in_left == 0 ) 
 +      {
 +        break;
 +      }
diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff 
slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff
--- eerst/slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff      
1970-01-01 01:00:00.000000000 +0100
+++ slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff    2007-01-15 
20:33:01.614068679 +0100
@@ -0,0 +1,212 @@
+#! /bin/sh -e
+if [ $# -ne 1 ]; then
+    echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
+    exit 1
+fi
+case "$1" in
+    -patch) patch -f --no-backup-if-mismatch -p1 < $0;;
+    -unpatch) patch -f --no-backup-if-mismatch -R -p1 < $0;;
+    *)
+        echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
+        exit 1;;
+esac
+
+exit 0
+
[EMAIL PROTECTED]@
+diff -Naur eerst/slrn-0.9.8.1pl1/src/art.c slrn-0.9.8.1pl1/src/art.c
+--- eerst/slrn-0.9.8.1pl1/src/art.c    2007-01-15 18:54:23.795361911 +0100
++++ slrn-0.9.8.1pl1/src/art.c  2007-01-15 18:51:24.808845511 +0100
+@@ -473,6 +473,9 @@
+    remove_from_hash_table (h);
+    slrn_free (h->tree_ptr);
+    slrn_free (h->subject);
++#ifdef USE_ICONV /* we've copied this string */
++   slrn_free (h->from);
++#endif
+    slrn_free (h->date);
+    slrn_free (h->realname);
+    slrn_free_additional_headers (h->add_hdrs);
+@@ -5519,7 +5522,7 @@
+ static Slrn_Header_Type *process_xover (Slrn_XOver_Type *xov)
+ {
+    Slrn_Header_Type *h;
+-   unsigned char *c;
++   char *c;
+    
+    h = (Slrn_Header_Type *) slrn_safe_malloc (sizeof (Slrn_Header_Type));
+    
+@@ -5527,19 +5530,44 @@
+    Number_Total++;
+    
+ #ifdef USE_ICONV
+-   /* ok, some news client (Outlook Express *sigh*) just put unencoded
++
++   /* Annoyingly, h->from wasn't malloced separately, but is part of the same
++    * buffer as h->subject (ie free()ing h->subject will also delete the 
memory
++    * space used by h->from().  That sucks, so make a copy of from, to make
++    * sure we can freely mess with the strings as we see fit.  
++    * NOTE: h->from is free()ed in free_header()
++    */
++   h->from = slrn_safe_strmalloc( h->from );
++
++   /* ok, some news clients (Outlook Express *sigh*) just put unencoded
+     * latin1/9 chars in their headers.  As we don't know any charset at 
+-    * this time, replace those chars by '?' chars */
+-   c = h->subject;
+-   while (*c!='\0' && *c!=0x0a && *c!=0x0d) 
+-   { 
+-         if (*c>=0x7f) *c = '?'; c++; 
++    * this time, try to translating them using the (user-specified) default 
++    * charset, or if the user didn't specify any, replace all non-ASCII
++    * chars by question marks
++    */
++
++   /* if the user specified a fallback charset */
++   if ( Slrn_Fallback_Input_Charset_Default == 0 )
++   {
++       slrn_chmap_translate_string(Slrn_Fallback_Input_Charset, 
++             Slrn_Charset, &(h->subject));
++       slrn_chmap_translate_string(Slrn_Fallback_Input_Charset, 
++             Slrn_Charset, &(h->from));
+    }
+-   c = h->from;
+-   while (*c!='\0' && *c!=0x0a && *c!=0x0d) 
+-   { 
++   else /* no fallback charset specified, so replace all high chars by ? */
++   {
++       c = h->subject;
++       while (*c!='\0' && *c!=0x0a && *c!=0x0d) 
++       { 
+          if (*c>=0x7f) *c = '?'; c++; 
++       }
++       c = h->from;
++       while (*c!='\0' && *c!=0x0a && *c!=0x0d) 
++       { 
++         if (*c>=0x7f) *c = '?'; c++; 
++       }
+    }
++
+ #endif /* USE_ICONV */
+ 
+ #if SLRN_HAS_MIME
+diff -Naur eerst/slrn-0.9.8.1pl1/src/art.h slrn-0.9.8.1pl1/src/art.h
+--- eerst/slrn-0.9.8.1pl1/src/art.h    2007-01-15 18:54:23.796361774 +0100
++++ slrn-0.9.8.1pl1/src/art.h  2007-01-12 22:40:58.470216033 +0100
+@@ -84,6 +84,11 @@
+ 
+ #endif                                       /* NOT SLRNPULL_CODE */
+ 
++#ifdef USE_ICONV
++extern char *Slrn_Fallback_Input_Charset;
++extern short int Slrn_Fallback_Input_Charset_Default;
++#endif
++
+ typedef struct Slrn_Header_Line_Type
+ {
+    char *name;
+diff -Naur eerst/slrn-0.9.8.1pl1/src/chmap.c slrn-0.9.8.1pl1/src/chmap.c
+--- eerst/slrn-0.9.8.1pl1/src/chmap.c  2007-01-15 18:54:23.798361500 +0100
++++ slrn-0.9.8.1pl1/src/chmap.c        2007-01-15 19:01:51.939060247 +0100
+@@ -75,6 +75,11 @@
+ 
+ #ifdef USE_ICONV
+ 
++/* the user-specified fallback charset */
++char *Slrn_Fallback_Input_Charset = NULL;
++/* is set to a true value if the user didn't specify an override fallback */
++short int Slrn_Fallback_Input_Charset_Default = 0;
++
+ const iconv_t ICONV_FAIL = (iconv_t) -1;
+ 
+ /* translate the string *str_ptr from charset cs_from to charset cs_to */
+@@ -92,7 +97,7 @@
+ 
+ 
+     /* make sure the charsets are initialized */
+-    if (cs_from == NULL) cs_from = ICONV_DEFAULT_CHARSET;
++    if (cs_from == NULL) cs_from = Slrn_Fallback_Input_Charset;
+     if (cs_to   == NULL) return *str_ptr;
+ 
+     /* don't translate if from and to charsets are equal */
+@@ -258,7 +263,7 @@
+ #ifdef USE_ICONV
+    /* check if we need to translate */
+    if (a->charset == NULL)
+-     charset = ICONV_DEFAULT_CHARSET;
++     charset = Slrn_Fallback_Input_Charset;
+    else 
+      charset = a->charset;
+        
+@@ -399,6 +404,22 @@
+ #if USE_ICONV
+   iconv_t cd;
+ 
++  /* first set the fallback input charset to ASCII if the user hasn't 
++   * specified it */
++  if ( Slrn_Fallback_Input_Charset != NULL 
++      && *Slrn_Fallback_Input_Charset == '\0' )
++  {
++    free( Slrn_Fallback_Input_Charset );
++  }
++  if ( Slrn_Fallback_Input_Charset == NULL )
++  {
++    Slrn_Fallback_Input_Charset = malloc( 9 );
++    strncpy( Slrn_Fallback_Input_Charset, "US-ASCII", 9 );
++    Slrn_Fallback_Input_Charset_Default = 1;
++  }
++
++  /* the rest of this function deals with the _output_ charset */
++
+   /* use environenment for locale */
+   setlocale (LC_ALL, "");
+   
+diff -Naur eerst/slrn-0.9.8.1pl1/src/chmap.h slrn-0.9.8.1pl1/src/chmap.h
+--- eerst/slrn-0.9.8.1pl1/src/chmap.h  2007-01-15 18:54:23.798361500 +0100
++++ slrn-0.9.8.1pl1/src/chmap.h        2007-01-15 18:50:00.857329255 +0100
+@@ -26,13 +26,12 @@
+ # undef USE_ICONV
+ #endif
+ 
+-#define ICONV_DEFAULT_CHARSET "iso-8859-15"
+-
+ extern int slrn_set_charset (char *);
+ extern int slrn_chmap_fix_file (char *, int);
+ #ifdef USE_ICONV
+ extern char * slrn_chmap_translate_string (
+       char *, char *, char **);
++extern char *Slrn_Fallback_Input_Charset;
+ #endif
+ extern void slrn_chmap_fix_body (Slrn_Article_Type *, int);
+ extern void slrn_chmap_fix_header (Slrn_Header_Type *);
+diff -Naur eerst/slrn-0.9.8.1pl1/src/mime.c slrn-0.9.8.1pl1/src/mime.c
+--- eerst/slrn-0.9.8.1pl1/src/mime.c   2007-01-15 18:54:23.831356987 +0100
++++ slrn-0.9.8.1pl1/src/mime.c 2007-01-15 18:49:38.805345750 +0100
+@@ -734,7 +734,7 @@
+    /* make sure we have a charset available */
+    if (a->charset==NULL)
+    {
+-       a->charset = slrn_safe_strmalloc(ICONV_DEFAULT_CHARSET);
++       a->charset = slrn_safe_strmalloc(Slrn_Fallback_Input_Charset);
+    }
+ #endif /* USE_ICONV */
+    
+diff -Naur eerst/slrn-0.9.8.1pl1/src/stamp-h1 slrn-0.9.8.1pl1/src/stamp-h1
+--- eerst/slrn-0.9.8.1pl1/src/stamp-h1 1970-01-01 01:00:00.000000000 +0100
++++ slrn-0.9.8.1pl1/src/stamp-h1       2007-01-15 18:51:41.310588234 +0100
+@@ -0,0 +1 @@
++timestamp for src/config.h
+diff -Naur eerst/slrn-0.9.8.1pl1/src/startup.c slrn-0.9.8.1pl1/src/startup.c
+--- eerst/slrn-0.9.8.1pl1/src/startup.c        2007-01-15 18:54:23.864352473 
+0100
++++ slrn-0.9.8.1pl1/src/startup.c      2007-01-12 22:16:05.556850947 +0100
+@@ -699,7 +699,9 @@
+      },
+      
+ #if SLRN_HAS_MIME
+-# ifndef USE_ICONV /* this is fetched from the current locale */
++# ifdef USE_ICONV
++     {"fallback_charset", &Slrn_Fallback_Input_Charset},
++#else
+      {"mime_charset", &Slrn_Mime_Display_Charset},
+ # endif /* USE_ICONV */
+      {"metamail_command", &Slrn_MetaMail_Cmd},

Reply via email to