On Mon, 5 Apr 2021 06:04:49 +0000 "Surla, Sai Kalyan" wrote:

> Is there any update on the issues.

I finally found time to work on the first issue (header detection)
where we had a workaround already and created proper patches (attached)
for the issue and sent them to the upstream maintainer.

-- 
bye,
pabs

https://wiki.debian.org/PaulWise
From a4aa24ae5675b09385d0c88add48c3ab046e699d Mon Sep 17 00:00:00 2001
From: Paul Wise <pa...@bonedaddy.net>
Date: Sun, 30 May 2021 10:02:14 +0800
Subject: [PATCH 1/3] Add debugging for header detection

---
 src/readpst.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/readpst.c b/src/readpst.c
index 6d94f15..b5910e9 100644
--- a/src/readpst.c
+++ b/src/readpst.c
@@ -1591,6 +1591,8 @@ void write_normal_email(FILE* f_output, char f_name[], pst_item* item, int mode,
     DEBUG_ENT("write_normal_email");
 
     pst_convert_utf8_null(item, &item->email->header);
+    DEBUG_INFO(("PST headers\n%s\n", *item->email->header.str));
+    DEBUG_INFO(("Extra MIME headers\n%s\n", *extra_mime_headers));
     headers = valid_headers(item->email->header.str) ? item->email->header.str :
               valid_headers(*extra_mime_headers)     ? *extra_mime_headers     :
               NULL;
-- 
2.30.2

From bade93dcdb435bc7bec50cf4b54481731beea45c Mon Sep 17 00:00:00 2001
From: Paul Wise <pa...@bonedaddy.net>
Date: Sun, 30 May 2021 09:49:57 +0800
Subject: [PATCH 2/3] Also detect email headers wrapped with space instead of
 tab

Spaces are commonly used for email header wrapping.
---
 src/readpst.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/readpst.c b/src/readpst.c
index b5910e9..6663771 100644
--- a/src/readpst.c
+++ b/src/readpst.c
@@ -1275,8 +1275,10 @@ int  header_match(char *header, char*field) {
     if (strncasecmp(header, field, n) == 0) return 1;   // tag:{space}
     if ((field[n-1] == ' ') && (strncasecmp(header, field, n-1) == 0)) {
         char *crlftab = "\r\n\t";
+        char *crlfspc = "\r\n ";
         DEBUG_INFO(("Possible wrapped header = %s\n", header));
         if (strncasecmp(header+n-1, crlftab, 3) == 0) return 1; // tag:{cr}{lf}{tab}
+        if (strncasecmp(header+n-1, crlfspc, 3) == 0) return 1; // tag:{cr}{lf}{space}
     }
     return 0;
 }
-- 
2.30.2

From da5f159caa66db380b793f9062a36888c9b12467 Mon Sep 17 00:00:00 2001
From: Paul Wise <pa...@bonedaddy.net>
Date: Sun, 30 May 2021 09:51:26 +0800
Subject: [PATCH 3/3] Detect reasonable email headers too

RFC 5322 specifies the syntax of email headers, most header fields are more
restricted though so use a restricted check in case the headers are bogus
parts of the body that happen to match RFC 5322.

Fixes: https://bugs.debian.org/984581
---
 src/readpst.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/src/readpst.c b/src/readpst.c
index 6663771..97ba127 100644
--- a/src/readpst.c
+++ b/src/readpst.c
@@ -1283,6 +1283,65 @@ int  header_match(char *header, char*field) {
     return 0;
 }
 
+// https://en.wikipedia.org/wiki/Email#Message_header
+// https://www.rfc-editor.org/rfc/rfc5322.html
+// https://www.iana.org/assignments/message-headers/message-headers.xhtml
+int  header_is_reasonable(char *header)
+{
+    char *c;
+#define C *c
+
+    // The header must not be NULL
+    if (header) c = header;
+    else return 0;
+
+    // usually the header field name starts with upper-case: A-Z
+    if (C >= 'A' && C <= 'Z') c++;
+    else return 0;
+
+    while(1) {
+        // most header field names use a limited set of characters: - 0-9 A-Z a-z
+        if (
+            (C >= 'A' && C <= 'Z') ||
+            (C >= 'a' && C <= 'z') ||
+            (C >= '0' && C <= '9') ||
+            (C == '-')
+           ) {
+            c++;
+        // the header field name is then terminated with a colon
+        } else if (C == ':') {
+          c++;
+          goto parse_header_field_value;
+        // other characters are an indicator of an invalid header
+        } else {
+          return 0;
+        }
+    }
+
+parse_header_field_value:
+    while(1) {
+        // header field values are printable US-ASCII plus space/tab
+        if (
+            (C >= 33 && C <= 126) ||
+            (C == ' ' || C == '\t')
+           ) {
+            c++;
+        // the header field value is then terminated with CRLF
+        } else if (C == '\r' && *(c+1) == '\n') {
+            c += 2;
+            // the value could continue to the next line though
+            if (C == ' ' || C == '\t') c++;
+            else return 1;
+        // other characters are an indicator of an invalid header
+        } else {
+          return 0;
+        }
+    }
+
+#undef C
+
+}
+
 int  valid_headers(char *header)
 {
     // headers are sometimes really bogus - they seem to be fragments of the
@@ -1303,6 +1362,7 @@ int  valid_headers(char *header)
         if (header_match(header, "X-ASG-Debug-ID: "               )) return 1;
         if (header_match(header, "X-Barracuda-URL: "              )) return 1;
         if (header_match(header, "X-x: "                          )) return 1;
+        if (header_is_reasonable(header)) return 1;
         if (strlen(header) > 2) {
             DEBUG_INFO(("Ignore bogus headers = %s\n", header));
         }
-- 
2.30.2

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to