On Mon, 31 Jul 2017, Axel Beckert wrote:

> Hi Mikulas,
> 
> not sure if you received that bug report already, too.
> 
>               Regards, Axel

Hi

Here I send a patch for this bug.

Mikulas



commit fee5dca79a93a37024e494b985386a5fe60bc1b7
Author: Mikulas Patocka <miku...@twibright.com>
Date:   Wed Aug 2 20:13:29 2017 +0200

    Fix read out of memory in case of corrupted UTF-8 data

---
 charsets.c |   37 +------------------------------------
 links.h    |    9 ++++-----
 2 files changed, 5 insertions(+), 41 deletions(-)

Index: links-2.14/charsets.c
===================================================================
--- links-2.14.orig/charsets.c
+++ links-2.14/charsets.c
@@ -215,41 +215,6 @@ static struct conv_table *get_translatio
        return utf_table;
 }
 
-unsigned short int utf8_2_uni_table[0x200] = {
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 128,   0, 0, 0, 192,   0,
-       0, 0, 256,      0, 0, 0, 320,   0, 0, 0, 384,   0, 0, 0, 448,   0,
-       0, 0, 512,      0, 0, 0, 576,   0, 0, 0, 640,   0, 0, 0, 704,   0,
-       0, 0, 768,      0, 0, 0, 832,   0, 0, 0, 896,   0, 0, 0, 960,   0,
-       0, 0, 1024,     0, 0, 0, 1088,  0, 0, 0, 1152,  0, 0, 0, 1216,  0,
-       0, 0, 1280,     0, 0, 0, 1344,  0, 0, 0, 1408,  0, 0, 0, 1472,  0,
-       0, 0, 1536,     0, 0, 0, 1600,  0, 0, 0, 1664,  0, 0, 0, 1728,  0,
-       0, 0, 1792,     0, 0, 0, 1856,  0, 0, 0, 1920,  0, 0, 0, 1984,  0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-       0, 0, 0,        0, 0, 0, 0,     0, 0, 0, 0,     0, 0, 0, 0,     0,
-};
-
 unsigned char utf_8_1[256] = {
        6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
@@ -269,7 +234,7 @@ unsigned char utf_8_1[256] = {
        3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 6, 6,
 };
 
-static_const unsigned min_utf_8[9] = {
+static_const unsigned min_utf_8[8] = {
        0, 0x4000000, 0x200000, 0x10000, 0x800, 0x80, 0x100, 0x1,
 };
 
Index: links-2.14/links.h
===================================================================
--- links-2.14.orig/links.h
+++ links-2.14/links.h
@@ -3906,15 +3906,14 @@ unsigned char *cp_strchr(int charset, un
 void init_charset(void);
 
 unsigned get_utf_8(unsigned char **p);
-extern unsigned short int utf8_2_uni_table[0x200];
 #define GET_UTF_8(s, c)                                                        
\
 do {                                                                   \
        if ((unsigned char)(s)[0] < 0x80)                               \
                (c) = (s)++[0];                                         \
-       else if (((c) = utf8_2_uni_table[((unsigned char)(s)[0] << 2) + \
-                               ((unsigned char)(s)[1] >> 6) - 0x200])) \
-               (c) += (unsigned char)(s)[1] & 0x3f, (s) += 2;          \
-       else                                                            \
+       else if ((unsigned char)(s)[0] >= 0xc2 && (unsigned char)(s)[0] < 0xe0 
&&\
+                ((unsigned char)(s)[1] & 0xc0) == 0x80) {              \
+               (c) = (unsigned char)(s)[0] * 0x40 + (unsigned char)(s)[1], (c) 
-= 0x3080, (s) += 2;\
+       } else                                                          \
                (c) = get_utf_8(&(s));                                  \
 } while (0)
 #define FWD_UTF_8(s)                                                   \

Reply via email to