Hi,

hopefully, third time's a charm.

As found by tsg@ with afl(1), the realloc code lacked adjustment
of auxiliary pointers, causing crashes.

Only the file fold.c changes, so i'm only resending that.

Yours,
  Ingo


Index: fold.c
===================================================================
RCS file: /cvs/src/usr.bin/fold/fold.c,v
retrieving revision 1.17
diff -u -p -r1.17 fold.c
--- fold.c      9 Oct 2015 01:37:07 -0000       1.17
+++ fold.c      22 May 2016 15:15:30 -0000
@@ -33,19 +33,22 @@
  * SUCH DAMAGE.
  */
 
+#include <ctype.h>
+#include <err.h>
+#include <limits.h>
+#include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <ctype.h>
-#include <err.h>
-#include <limits.h>
+#include <wchar.h>
 
 #define        DEFLINEWIDTH    80
 
 static void fold(unsigned int);
-static unsigned int new_column_position(unsigned int, int);
+static int isu8cont(unsigned char);
 static __dead void usage(void);
+
 int count_bytes = 0;
 int split_words = 0;
 
@@ -56,6 +59,8 @@ main(int argc, char *argv[])
        unsigned int width;
        const char *errstr;
 
+       setlocale(LC_CTYPE, "");
+
        if (pledge("stdio rpath", NULL) == -1)
                err(1, "pledge");
 
@@ -110,12 +115,11 @@ main(int argc, char *argv[])
                for (; *argv; ++argv) {
                        if (!freopen(*argv, "r", stdin))
                                err(1, "%s", *argv);
-                               /* NOTREACHED */
                        else
                                fold(width);
                }
        }
-       exit(0);
+       return 0;
 }
 
 /*
@@ -130,100 +134,135 @@ main(int argc, char *argv[])
  * returns embedded in the input stream.
  */
 static void
-fold(unsigned int width)
+fold(unsigned int max_width)
 {
-       static char *buf = NULL;
-       static int   buf_max = 0;
-       int ch;
-       unsigned int col, indx;
-
-       col = indx = 0;
-       while ((ch = getchar()) != EOF) {
-               if (ch == '\n') {
-                       if (indx != 0)
-                               fwrite(buf, 1, indx, stdout);
-                       putchar('\n');
-                       col = indx = 0;
-                       continue;
-               }
+       static char     *buf = NULL;
+       static size_t    bufsz = 2048;
+       char            *cp;    /* Current mb character. */
+       char            *np;    /* Next mb character. */
+       char            *sp;    /* To search for the last space. */
+       char            *nbuf;  /* For buffer reallocation. */
+       wchar_t          wc;    /* Current wide character. */
+       int              ch;    /* Last byte read. */
+       int              len;   /* Bytes in the current mb character. */
+       unsigned int     col;   /* Current display position. */
+       int              width; /* Display width of wc. */
+
+       if (buf == NULL && (buf = malloc(bufsz)) == NULL)
+               err(1, NULL);
+
+       np = cp = buf;
+       ch = 0;
+       col = 0;
+
+       while (ch != EOF) {  /* Loop on input characters. */
+               while ((ch = getchar()) != EOF) {  /* Loop on input bytes. */
+                       if (np + 1 == buf + bufsz) {
+                               nbuf = reallocarray(buf, 2, bufsz);
+                               if (nbuf == NULL)
+                                       err(1, NULL);
+                               bufsz *= 2;
+                               cp = nbuf + (cp - buf);
+                               np = nbuf + (np - buf);
+                               buf = nbuf;
+                       }
+                       *np++ = ch;
 
-               col = new_column_position(col, ch);
-               if (col > width) {
-                       unsigned int i, last_space;
-
-                       if (split_words) {
-                               for (i = 0, last_space = -1; i < indx; i++)
-                                       if(buf[i] == ' ')
-                                               last_space = i;
+                       /*
+                        * Read up to and including the first byte of
+                        * the next character, such that we are sure
+                        * to have a complete character in the buffer.
+                        * There is no need to read more than five bytes
+                        * ahead, since UTF-8 characters are four bytes
+                        * long at most.
+                        */
+
+                       if (np - cp > 4 || (np - cp > 1 && !isu8cont(ch)))
+                               break;
+               }
+
+               while (cp < np) {  /* Loop on output characters. */
+
+                       /* Handle end of line and backspace. */
+
+                       if (*cp == '\n' || (*cp == '\r' && !count_bytes)) {
+                               fwrite(buf, 1, ++cp - buf, stdout);
+                               memmove(buf, cp, np - cp);
+                               np = buf + (np - cp);
+                               cp = buf;
+                               col = 0;
+                               continue;
+                       }
+                       if (*cp == '\b' && !count_bytes) {
+                               if (col)
+                                       col--;
+                               cp++;
+                               continue;
                        }
 
-                       if (split_words && last_space != -1) {
-                               last_space++;
+                       /*
+                        * Measure display width.
+                        * Process the last byte only if
+                        * end of file was reached.
+                        */
+
+                       if (np - cp > (ch != EOF)) {
+                               len = 1;
+                               width = 1;
+
+                               if (*cp == '\t') {
+                                       if (count_bytes == 0)
+                                               width = 8 - (col & 7);
+                               } else if ((len = mbtowc(&wc, cp,
+                                   np - cp)) < 1)
+                                       len = 1;
+                               else if (count_bytes)
+                                       width = len;
+                               else if ((width = wcwidth(wc)) < 0)
+                                       width = 1;
+
+                               col += width;
+                               if (col <= max_width || cp == buf) {
+                                       cp += len;
+                                       continue;
+                               }
+                       }
 
-                               fwrite(buf, 1, last_space, stdout);
-                               memmove(buf, buf+last_space, indx-last_space);
+                       /* Line break required. */
 
-                               indx -= last_space;
-                               col = 0;
-                               for (i = 0; i < indx; i++) {
-                                       col = new_column_position(col, buf[i]);
+                       if (col > max_width) {
+                               if (split_words) {
+                                       for (sp = cp; sp > buf; sp--) {
+                                               if (sp[-1] == ' ') {
+                                                       cp = sp;
+                                                       break;
+                                               }
+                                       }
                                }
-                       } else {
-                               fwrite(buf, 1, indx, stdout);
-                               col = indx = 0;
+                               fwrite(buf, 1, cp - buf, stdout);
+                               putchar('\n');
+                               memmove(buf, cp, np - cp);
+                               np = buf + (np - cp);
+                               cp = buf;
+                               col = 0;
+                               continue;
                        }
-                       putchar('\n');
 
-                       /* calculate the column position for the next line. */
-                       col = new_column_position(col, ch);
-               }
+                       /* Need more input. */
 
-               if (indx + 1 > buf_max) {
-                       int newmax = buf_max + 2048;
-                       char *newbuf;
-
-                       /* Allocate buffer in LINE_MAX increments */
-                       if ((newbuf = realloc(buf, newmax)) == NULL) {
-                               err(1, NULL);
-                               /* NOTREACHED */
-                       }
-                       buf = newbuf;
-                       buf_max = newmax;
+                       break;
                }
-               buf[indx++] = ch;
        }
+       fwrite(buf, 1, np - buf, stdout);
 
-       if (indx != 0)
-               fwrite(buf, 1, indx, stdout);
+       if (ferror(stdin))
+               err(1, NULL);
 }
 
-/*
- * calculate the column position 
- */
-static unsigned int
-new_column_position(unsigned int col, int ch)
+static int
+isu8cont(unsigned char c)
 {
-       if (!count_bytes) {
-               switch (ch) {
-               case '\b':
-                       if (col > 0)
-                               --col;
-                       break;
-               case '\r':
-                       col = 0;
-                       break;
-               case '\t':
-                       col = (col + 8) & ~7;
-                       break;
-               default:
-                       ++col;
-                       break;
-               }
-       } else {
-               ++col;
-       }
-
-       return col;
+       return MB_CUR_MAX > 1 && (c & (0x80 | 0x40)) == 0x80;
 }
 
 static __dead void

Reply via email to