control: retitle -1 html2text: segfault because of broken utf8 support control: tags -1 +patch
Dear Maintainers, I've found that the segfaults reported here are because of integer overflows and other problems in utf8_* functions in file Area.C I rewrite them, and it doesn't segfault anymore. Here is the patch: --- a/Area.C +++ b/Area.C @@ -86,49 +86,29 @@ /* utf_length() and utf_width() * - * Very simplified algorithm of calculating length of UTF-8 - * string. No check for errors. Counting only ASCII bytes and - * leading bytes of UTF-8 multibyte sequences. All bytes like - * 10xxxxxx are dropped. If USE_UTF8 is false then returns - * usual length. --YS + * Easy and secure way: count "continuation" + * (10xxxxxx) bytes and subtract that number. + * If USE_UTF8 is false then returns usual length. */ -size_t utf8_aux_count(char ch) -{ - if((ch & 0xe0) == 0xc0) - { - return 1; - } - else if((ch & 0xf0) == 0xe0) - { - return 2; - } - else if ((ch & 0xf8) == 0xf0) - { - return 3; - } - else - { - return 0; - } +int utf8_is_continuation(char ch) { + return (ch & 0xc0) == 0x80; } unsigned int Line::utf_length(size_type f, size_type t) const { - size_type m = (t < length_ ? t : length_); - size_type r = m - f; - if(USE_UTF8) - { - for (int i = f; i < m; i++) - { - char& ch = cells_[i].character; - size_type aux_count = utf8_aux_count(ch); - r -= aux_count; - i += aux_count; - } - } - return r; + size_type m = (t < length_ ? t : length_); + size_type i = (f < m ? f : m); //sanity check + size_type r = m - i; + if(USE_UTF8) { + for (; i < m; i++) { + if (utf8_is_continuation(cells_[i].character)) { + r--; + } + } + } + return r; } void @@ -290,19 +270,20 @@ Area::utf_width() { size_type r = width_; - if(USE_UTF8) { r = 0; + if (USE_UTF8) { + r = 0; for (size_type yy = 0; yy < height_; yy++) { - int i = width_ - 1; - while((i >= 0) && isspace(cells_[yy][i].character)) - { - --i; - } - size_type aux_count_sum = 0; + int i = width_ - 1; // what if width_ > maxint ? ;) + while ((i >= 0) && isspace(cells_[yy][i].character)) { + i--; + } + size_type r1 = width_; for (; i >= 0; i--) { - aux_count_sum += utf8_aux_count(cells_[yy][i].character); + if (utf8_is_continuation(cells_[yy][i].character)) { + r1--; + } } - size_type r1 = width_ - aux_count_sum; - if(r < r1) r = r1; + if (r < r1) r = r1; } } return r; Also, I've noticed a place where (possible) errors from malloc and realloc aren't caught, please consider patching e.g. this way: --- a/Area.C +++ b/Area.C @@ -30,7 +30,7 @@ /***************************************************************************/ - +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <iostream> @@ -45,10 +45,14 @@ /* ------------------------------------------------------------------------- */ +static void* alloc_error() { + perror("html2text: error"); abort(); +} + #define malloc_array(type, size)\ -((type *) malloc(sizeof(type) * (size))) +((type *) (malloc(sizeof(type) * (size)) ? : alloc_error())) #define realloc_array(array, type, size) \ -((array) = (type *) realloc((array), sizeof(type) * (size))) +((array) = (type *) (realloc((array), sizeof(type) * (size)) ? : alloc_error())) #define copy_array(from, to, type, count) \ ((void) memcpy((to), (from), (count) * sizeof(type))) -- To UNSUBSCRIBE, email to debian-bugs-dist-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org