control: retitle -1 html2text: segfault because of broken utf8 support
control: tags -1 +patch

Dear Maintainers,

I've found that the segfaults reported here are because of integer
overflows and other problems in utf8_* functions in file Area.C
I rewrite them, and it doesn't segfault anymore. Here is the patch:

--- a/Area.C
+++ b/Area.C
@@ -86,49 +86,29 @@
 
 /*           utf_length() and utf_width()       
  *
- *     Very simplified algorithm of calculating length of UTF-8
- *   string. No check for errors. Counting only ASCII bytes and
- *   leading bytes of UTF-8 multibyte sequences. All bytes like
- *   10xxxxxx are dropped. If USE_UTF8 is false then returns
- *   usual length.               --YS
+ *   Easy and secure way: count "continuation"
+ *   (10xxxxxx) bytes and subtract that number.
+ *   If USE_UTF8 is false then returns usual length.
  */
 
-size_t utf8_aux_count(char ch)
-{
-       if((ch & 0xe0) == 0xc0)
-       {
-               return 1;
-       }
-       else if((ch & 0xf0) == 0xe0)
-       {
-               return 2;
-       }
-       else if ((ch & 0xf8) == 0xf0)
-       {
-               return 3;
-       }
-       else
-       {
-               return 0;
-       }
+int utf8_is_continuation(char ch) {
+  return (ch & 0xc0) == 0x80;
 }
 
 unsigned int
 Line::utf_length(size_type f, size_type t) const
 {
-       size_type m = (t < length_ ? t : length_);
-       size_type r = m - f;
-       if(USE_UTF8)
-       {
-               for (int i = f; i < m; i++)
-               {
-                       char& ch = cells_[i].character;
-                       size_type aux_count = utf8_aux_count(ch);
-                       r -= aux_count;
-                       i += aux_count;
-               }
-       }
-       return r;
+  size_type m = (t < length_ ? t : length_);
+  size_type i = (f < m ? f : m); //sanity check
+  size_type r = m - i;
+  if(USE_UTF8) {
+    for (; i < m; i++) {
+      if (utf8_is_continuation(cells_[i].character)) {
+        r--;
+      }
+    }
+  }
+  return r;
 }
 
 void
@@ -290,19 +270,20 @@
 Area::utf_width()
 {
   size_type r = width_;
-  if(USE_UTF8) { r = 0;
+  if (USE_UTF8) {
+    r = 0;
     for (size_type yy = 0; yy < height_; yy++) {
-         int i = width_ - 1;
-      while((i >= 0) && isspace(cells_[yy][i].character))
-         {
-                 --i;
-         }
-      size_type aux_count_sum = 0;
+      int i = width_ - 1; // what if width_ > maxint ? ;)
+      while ((i >= 0) && isspace(cells_[yy][i].character)) {
+        i--;
+      }
+      size_type r1 = width_;
       for (; i >= 0; i--) {
-               aux_count_sum += utf8_aux_count(cells_[yy][i].character);
+         if (utf8_is_continuation(cells_[yy][i].character)) {
+           r1--;
+         }
       }
-         size_type r1 = width_ - aux_count_sum;
-      if(r < r1) r = r1;
+      if (r < r1) r = r1;
     }
   }
   return r;


Also, I've noticed a place where (possible) errors from malloc and realloc 
aren't caught, please consider patching e.g. this way:


--- a/Area.C
+++ b/Area.C
@@ -30,7 +30,7 @@
 
  /***************************************************************************/
 
-
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <iostream>
@@ -45,10 +45,14 @@
 
 /* ------------------------------------------------------------------------- */
 
+static void* alloc_error() {
+  perror("html2text: error"); abort();
+}
+
 #define malloc_array(type, size)\
-((type *) malloc(sizeof(type) * (size)))
+((type *) (malloc(sizeof(type) * (size)) ? : alloc_error()))
 #define realloc_array(array, type, size) \
-((array) = (type *) realloc((array), sizeof(type) * (size)))
+((array) = (type *) (realloc((array), sizeof(type) * (size)) ? : 
alloc_error()))
 #define copy_array(from, to, type, count) \
 ((void) memcpy((to), (from), (count) * sizeof(type)))
 


-- 
To UNSUBSCRIBE, email to debian-bugs-dist-requ...@lists.debian.org
with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org

Reply via email to