On Mon, 5 Apr 2021 06:04:49 +0000 "Surla, Sai Kalyan" wrote: > Is there any update on the issues.
I finally found time to work on the first issue (header detection) where we had a workaround already and created proper patches (attached) for the issue and sent them to the upstream maintainer. -- bye, pabs https://wiki.debian.org/PaulWise
From a4aa24ae5675b09385d0c88add48c3ab046e699d Mon Sep 17 00:00:00 2001 From: Paul Wise <pa...@bonedaddy.net> Date: Sun, 30 May 2021 10:02:14 +0800 Subject: [PATCH 1/3] Add debugging for header detection --- src/readpst.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/readpst.c b/src/readpst.c index 6d94f15..b5910e9 100644 --- a/src/readpst.c +++ b/src/readpst.c @@ -1591,6 +1591,8 @@ void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode, DEBUG_ENT("write_normal_email"); pst_convert_utf8_null(item, &item->email->header); + DEBUG_INFO(("PST headers\n%s\n", *item->email->header.str)); + DEBUG_INFO(("Extra MIME headers\n%s\n", *extra_mime_headers)); headers = valid_headers(item->email->header.str) ? item->email->header.str : valid_headers(*extra_mime_headers) ? *extra_mime_headers : NULL; -- 2.30.2
From bade93dcdb435bc7bec50cf4b54481731beea45c Mon Sep 17 00:00:00 2001 From: Paul Wise <pa...@bonedaddy.net> Date: Sun, 30 May 2021 09:49:57 +0800 Subject: [PATCH 2/3] Also detect email headers wrapped with space instead of tab Spaces are commonly used for email header wrapping. --- src/readpst.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/readpst.c b/src/readpst.c index b5910e9..6663771 100644 --- a/src/readpst.c +++ b/src/readpst.c @@ -1275,8 +1275,10 @@ int header_match(char *header, char*field) { if (strncasecmp(header, field, n) == 0) return 1; // tag:{space} if ((field[n-1] == ' ') && (strncasecmp(header, field, n-1) == 0)) { char *crlftab = "\r\n\t"; + char *crlfspc = "\r\n "; DEBUG_INFO(("Possible wrapped header = %s\n", header)); if (strncasecmp(header+n-1, crlftab, 3) == 0) return 1; // tag:{cr}{lf}{tab} + if (strncasecmp(header+n-1, crlfspc, 3) == 0) return 1; // tag:{cr}{lf}{space} } return 0; } -- 2.30.2
From da5f159caa66db380b793f9062a36888c9b12467 Mon Sep 17 00:00:00 2001 From: Paul Wise <pa...@bonedaddy.net> Date: Sun, 30 May 2021 09:51:26 +0800 Subject: [PATCH 3/3] Detect reasonable email headers too RFC 5322 specifies the syntax of email headers, most header fields are more restricted though so use a restricted check in case the headers are bogus parts of the body that happen to match RFC 5322. Fixes: https://bugs.debian.org/984581 --- src/readpst.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/readpst.c b/src/readpst.c index 6663771..97ba127 100644 --- a/src/readpst.c +++ b/src/readpst.c @@ -1283,6 +1283,65 @@ int header_match(char *header, char*field) { return 0; } +// https://en.wikipedia.org/wiki/Email#Message_header +// https://www.rfc-editor.org/rfc/rfc5322.html +// https://www.iana.org/assignments/message-headers/message-headers.xhtml +int header_is_reasonable(char *header) +{ + char *c; +#define C *c + + // The header must not be NULL + if (header) c = header; + else return 0; + + // usually the header field name starts with upper-case: A-Z + if (C >= 'A' && C <= 'Z') c++; + else return 0; + + while(1) { + // most header field names use a limited set of characters: - 0-9 A-Z a-z + if ( + (C >= 'A' && C <= 'Z') || + (C >= 'a' && C <= 'z') || + (C >= '0' && C <= '9') || + (C == '-') + ) { + c++; + // the header field name is then terminated with a colon + } else if (C == ':') { + c++; + goto parse_header_field_value; + // other characters are an indicator of an invalid header + } else { + return 0; + } + } + +parse_header_field_value: + while(1) { + // header field values are printable US-ASCII plus space/tab + if ( + (C >= 33 && C <= 126) || + (C == ' ' || C == '\t') + ) { + c++; + // the header field value is then terminated with CRLF + } else if (C == '\r' && *(c+1) == '\n') { + c += 2; + // the value could continue to the next line though + if (C == ' ' || C == '\t') c++; + else return 1; + // other characters are an indicator of an invalid header + } else { + return 0; + } + } + +#undef C + +} + int valid_headers(char *header) { // headers are sometimes really bogus - they seem to be fragments of the @@ -1303,6 +1362,7 @@ int valid_headers(char *header) if (header_match(header, "X-ASG-Debug-ID: " )) return 1; if (header_match(header, "X-Barracuda-URL: " )) return 1; if (header_match(header, "X-x: " )) return 1; + if (header_is_reasonable(header)) return 1; if (strlen(header) > 2) { DEBUG_INFO(("Ignore bogus headers = %s\n", header)); } -- 2.30.2
signature.asc
Description: This is a digitally signed message part