I have written a patch for this issue.

Logged in Ubuntu: https://bugs.launchpad.net/ubuntu/+source/procps/+bug/318221

I have tried to contact the upstream, but have failed to do so - the
email addresses in the man page do not seem to be read, and the
mailing list contains only spam.



-- 
Jarrod Lowe
diff -du -r procps-3.2.7-orig/AUTHORS procps-3.2.7/AUTHORS
--- procps-3.2.7-orig/AUTHORS	2002-10-10 22:14:34.000000000 +0100
+++ procps-3.2.7/AUTHORS	2008-10-04 14:50:10.000000000 +0100
@@ -47,4 +47,5 @@
 watch:
 Tony Rems <re...@unisoft.com>
 Mike Coleman <m...@acm.org>
+Jarrod Lowe <pro...@rrod.net>
 
diff -du -r procps-3.2.7-orig/Makefile procps-3.2.7/Makefile
--- procps-3.2.7-orig/Makefile	2006-06-24 10:02:25.000000000 +0100
+++ procps-3.2.7/Makefile	2008-10-04 15:05:50.000000000 +0100
@@ -68,6 +68,7 @@
 _TARFILES := Makefile
 
 CURSES := -lncurses
+CURSESW := -lncursesw
 
 # This seems about right for the dynamic library stuff.
 # Something like this is probably needed to make the SE Linux
@@ -119,7 +120,7 @@
 # Unlike the kernel one, this check_gcc goes all the way to
 # producing an executable. There might be a -m64 that works
 # until you go looking for a 64-bit curses library.
-check_gcc = $(shell if $(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) dummy.c $(ALL_LDFLAGS) $(1) -o /dev/null $(CURSES) > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;)
+check_gcc = $(shell if $(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) dummy.c $(ALL_LDFLAGS) $(1) -o /dev/null $(CURSES) $(CURSESW) > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;)
 
 # Be 64-bit if at all possible. In a cross-compiling situation, one may
 # do "make m64=-m32 lib64=lib" to produce 32-bit executables. DO NOT
@@ -250,7 +251,7 @@
 	$(CC) $(ALL_CFLAGS) $^ $(ALL_LDFLAGS) -o $@ $(CURSES)
 
 watch: % : %.o
-	$(CC) $(ALL_CFLAGS) $^ $(ALL_LDFLAGS) -o $@ $(CURSES)
+	$(CC) $(ALL_CFLAGS) $^ $(ALL_LDFLAGS) -o $@ $(CURSESW)
 
 ############ progX --> progY
 
diff -du -r procps-3.2.7-orig/watch.1 procps-3.2.7/watch.1
--- procps-3.2.7-orig/watch.1	2003-02-09 07:05:25.000000000 +0000
+++ procps-3.2.7/watch.1	2008-10-04 14:46:46.000000000 +0100
@@ -79,9 +79,21 @@
 .PP
 Non-printing characters are stripped from program output.  Use "cat -v" as
 part of the command pipeline if you want to see them.
+.PP
+Combining Characters that are supposed to display on the character at the
+last column on the screen may display one column early, or they may not
+display at all.
+.PP
+Combining Characters never count as different in
+.I --differences
+mode. Only the base character counts.
+.PP
+Blank lines directly after a line which ends in the last column do not
+display.
 .SH AUTHORS
 The original
 .B watch
 was written by Tony Rems <re...@unisoft.com> in 1991, with mods and
 corrections by Francois Pinard.  It was reworked and new features added by
-Mike Coleman <m...@acm.org> in 1999.
+Mike Coleman <m...@acm.org> in 1999. Unicode support was added in 2009
+by Jarrod Lowe <pro...@rrod.net>.
diff -du -r procps-3.2.7-orig/watch.c procps-3.2.7/watch.c
--- procps-3.2.7-orig/watch.c	2006-06-17 10:18:38.000000000 +0100
+++ procps-3.2.7/watch.c	2008-10-04 15:06:09.000000000 +0100
@@ -8,14 +8,17 @@
  * Mike Coleman <m...@acm.org>.
  *
  * Changes by Albert Cahalan, 2002-2003.
+ *
+ * Unicode Support added by Jarrod Lowe <pro...@rrod.net> in 2009.
  */
 
-#define VERSION "0.2.0"
+#define VERSION "0.3.0"
 
+#include <wchar.h>
 #include <ctype.h>
 #include <getopt.h>
 #include <signal.h>
-#include <ncurses.h>
+#include <ncursesw/ncurses.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -25,6 +28,7 @@
 #include <termios.h>
 #include <locale.h>
 #include "proc/procps.h"
+#include <errno.h>
 
 #ifdef FORCE_8BIT
 #undef isprint
@@ -134,6 +138,32 @@
 	}
 }
 
+// read a wide character from a popen'd stream
+#define MAX_ENC_BYTES 16
+wint_t my_getwc(FILE *s);
+wint_t my_getwc(FILE *s) {
+	char i[MAX_ENC_BYTES]; //assuming no encoding ever consumes more than 16 bytes
+	int byte = 0;
+	int convert;
+	int x;
+	wchar_t rval;
+	while(1) {
+		i[byte] = getc(s);
+		if (i[byte]==EOF) { return WEOF; }
+		byte++;
+		errno = 0;
+		mbtowc(NULL, NULL, 0);
+		convert = mbtowc(&rval, i, byte);
+		x = errno;
+		if(convert > 0) { return rval; } //legal conversion
+		if(byte == MAX_ENC_BYTES) {
+		while(byte > 1) { ungetc(i[--byte], s); } //at least *try* to fix up
+		errno = -EILSEQ;
+		return WEOF;
+		}
+	}
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -143,7 +173,10 @@
 	    option_help = 0, option_version = 0;
 	double interval = 2;
 	char *command;
+	wchar_t *wcommand = NULL;
 	int command_length = 0;	/* not including final \0 */
+	int wcommand_columns = 0;	/* not including final \0 */
+	int wcommand_characters = 0; /* not including final \0 */
 
 	setlocale(LC_ALL, "");
 	progname = argv[0];
@@ -216,6 +249,23 @@
 		command[command_length] = '\0';
 	}
 
+	// convert to wide for printing purposes
+	//mbstowcs(NULL, NULL, 0);
+	wcommand_characters = mbstowcs(NULL, command, 0);
+	if(wcommand_characters < 0) {
+		fprintf(stderr, "Unicode Handling Error\n");
+		exit(1);
+	}
+	wcommand = (wchar_t*)malloc((wcommand_characters+1) * sizeof(wcommand));
+	if(wcommand == NULL) {
+		fprintf(stderr, "Unicode Handling Error (malloc)\n");
+		exit(1);
+	}
+	mbstowcs(wcommand, command, wcommand_characters+1);
+	wcommand_columns = wcswidth(wcommand, -1);
+
+
+	
 	get_terminal_size();
 
 	/* Catch keyboard interrupts so we can put tty back in a sane state.  */
@@ -252,12 +302,44 @@
 		if (show_title) {
 			// left justify interval and command,
 			// right justify time, clipping all to fit window width
-			asprintf(&header, "Every %.1fs: %.*s",
-				interval, min(width - 1, command_length), command);
-			mvaddstr(0, 0, header);
-			if (strlen(header) > (size_t) (width - tsl - 1))
-				mvaddstr(0, width - tsl - 4, "...  ");
-			mvaddstr(0, width - tsl + 1, ts);
+
+			int hlen = asprintf(&header, "Every %.1fs: ", interval);
+
+			// the rules:
+			//   width < tsl : print nothing
+			//   width < tsl + hlen + 1: print ts
+			//   width = tsl + hlen + 1: print header, ts
+			//   width < tsl + hlen + 4: print header, ..., ts
+			//   width < tsl + hlen + wcommand_columns: print header, truncated wcommand, ..., ts
+			//   width > "": print header, wcomand, ts
+			// this is slightly different from how it used to be
+			if(width >= tsl) {
+				if(width >= tsl + hlen + 1) {
+					mvaddstr(0, 0, header);
+					if(width >= tsl + hlen + 2) {
+						if(width < tsl + hlen + 4) {
+							mvaddstr(0, width - tsl - 4, "...  ");
+						}else{
+							if(width < tsl + hlen + wcommand_columns) {
+								// print truncated
+								int avail_columns = width - tsl - hlen;
+								int using_columns = wcommand_columns;
+								int using_characters = wcommand_characters;
+								while(using_columns > avail_columns - 4) {
+									using_characters--;
+								using_columns = wcswidth(wcommand, using_characters);
+								}
+								mvaddnwstr(0, hlen, wcommand, using_characters);
+								mvaddstr(0, width - tsl - 4, "... ");
+							}else{
+								mvaddwstr(0, hlen, wcommand);
+							}
+						}
+					}
+				}
+				mvaddstr(0, width - tsl + 1, ts);
+			}
+			
 			free(header);
 		}
 
@@ -268,47 +350,62 @@
 
 		for (y = show_title; y < height; y++) {
 			int eolseen = 0, tabpending = 0;
+			wint_t carry = WEOF;
 			for (x = 0; x < width; x++) {
-				int c = ' ';
+				wint_t c = L' ';
 				int attr = 0;
 
 				if (!eolseen) {
 					/* if there is a tab pending, just spit spaces until the
 					   next stop instead of reading characters */
 					if (!tabpending)
-						do
-							c = getc(p);
-						while (c != EOF && !isprint(c)
-						       && c != '\n'
-						       && c != '\t');
-					if (c == '\n')
+						do {
+							if(carry == WEOF) {
+								c = my_getwc(p);
+							}else{
+								c = carry;
+								carry = WEOF;
+							}
+						}while (c != WEOF && !isprint(c) && c<128
+						       && wcwidth(c) == 0
+						       && c != L'\n'
+						       && c != L'\t');
+					if (c == L'\n')
 						if (!oldeolseen && x == 0) {
 							x = -1;
 							continue;
 						} else
 							eolseen = 1;
-					else if (c == '\t')
+					else if (c == L'\t')
 						tabpending = 1;
-					if (c == EOF || c == '\n' || c == '\t')
-						c = ' ';
+					if (x==width-1 && wcwidth(c)==2) {
+						y++;
+						x = -1; //process this double-width
+						carry = c; //character on the next line
+						continue; //because it won't fit here
+					}
+					if (c == WEOF || c == L'\n' || c == L'\t')
+						c = L' ';
 					if (tabpending && (((x + 1) % 8) == 0))
 						tabpending = 0;
 				}
 				move(y, x);
 				if (option_differences) {
-					int oldch = inch();
-					char oldc = oldch & A_CHARTEXT;
+						cchar_t oldc;
+					in_wch(&oldc);
 					attr = !first_screen
-					    && (c != oldc
+					    && ((wchar_t)c != oldc.chars[0]
 						||
 						(option_differences_cumulative
-						 && (oldch & A_ATTRIBUTES)));
+						 && (oldc.attr & A_ATTRIBUTES)));
 				}
 				if (attr)
 					standout();
-				addch(c);
+				addnwstr((wchar_t*)&c,1);
 				if (attr)
 					standend();
+				if(wcwidth(c) == 0) { x--; }
+				if(wcwidth(c) == 2) { x++; }
 			}
 			oldeolseen = eolseen;
 		}
diff -du -r procps-3.2.8-orig/AUTHORS procps-3.2.8/AUTHORS
--- procps-3.2.8-orig/AUTHORS	2002-10-10 22:14:34.000000000 +0100
+++ procps-3.2.8/AUTHORS	2008-10-04 14:50:10.000000000 +0100
@@ -47,4 +47,5 @@
 watch:
 Tony Rems <re...@unisoft.com>
 Mike Coleman <m...@acm.org>
+Jarrod Lowe <pro...@rrod.net>
 
diff -du -r procps-3.2.8-orig/Makefile procps-3.2.8/Makefile
--- procps-3.2.8-orig/Makefile	2006-06-24 10:02:25.000000000 +0100
+++ procps-3.2.8/Makefile	2008-10-04 15:05:50.000000000 +0100
@@ -68,6 +68,7 @@
 _TARFILES := Makefile
 
 CURSES := -lncurses
+CURSESW := -lncursesw
 
 # This seems about right for the dynamic library stuff.
 # Something like this is probably needed to make the SE Linux
@@ -119,7 +120,7 @@
 # Unlike the kernel one, this check_gcc goes all the way to
 # producing an executable. There might be a -m64 that works
 # until you go looking for a 64-bit curses library.
-check_gcc = $(shell if $(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) dummy.c $(ALL_LDFLAGS) $(1) -o /dev/null $(CURSES) > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;)
+check_gcc = $(shell if $(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) dummy.c $(ALL_LDFLAGS) $(1) -o /dev/null $(CURSES) $(CURSESW) > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;)
 
 # Be 64-bit if at all possible. In a cross-compiling situation, one may
 # do "make m64=-m32 lib64=lib" to produce 32-bit executables. DO NOT
@@ -250,7 +251,7 @@
 	$(CC) $(ALL_CFLAGS) $^ $(ALL_LDFLAGS) -o $@ $(CURSES)
 
 watch: % : %.o
-	$(CC) $(ALL_CFLAGS) $^ $(ALL_LDFLAGS) -o $@ $(CURSES)
+	$(CC) $(ALL_CFLAGS) $^ $(ALL_LDFLAGS) -o $@ $(CURSESW)
 
 ############ progX --> progY
 
diff -du -r procps-3.2.8-orig/watch.1 procps-3.2.8/watch.1
--- procps-3.2.8-orig/watch.1	2003-02-09 07:05:25.000000000 +0000
+++ procps-3.2.8/watch.1	2008-10-04 14:46:46.000000000 +0100
@@ -79,9 +79,21 @@
 .PP
 Non-printing characters are stripped from program output.  Use "cat -v" as
 part of the command pipeline if you want to see them.
+.PP
+Combining Characters that are supposed to display on the character at the
+last column on the screen may display one column early, or they may not
+display at all.
+.PP
+Combining Characters never count as different in
+.I --differences
+mode. Only the base character counts.
+.PP
+Blank lines directly after a line which ends in the last column do not
+display.
 .SH AUTHORS
 The original
 .B watch
 was written by Tony Rems <re...@unisoft.com> in 1991, with mods and
 corrections by Francois Pinard.  It was reworked and new features added by
-Mike Coleman <m...@acm.org> in 1999.
+Mike Coleman <m...@acm.org> in 1999. Unicode support was added in 2009
+by Jarrod Lowe <pro...@rrod.net>.
diff -du -r procps-3.2.8-orig/watch.c procps-3.2.8/watch.c
--- procps-3.2.8-orig/watch.c	2006-06-17 10:18:38.000000000 +0100
+++ procps-3.2.8/watch.c	2008-10-04 15:06:09.000000000 +0100
@@ -8,14 +8,17 @@
  * Mike Coleman <m...@acm.org>.
  *
  * Changes by Albert Cahalan, 2002-2003.
+ *
+ * Unicode Support added by Jarrod Lowe <pro...@rrod.net> in 2009.
  */
 
-#define VERSION "0.2.0"
+#define VERSION "0.3.0"
 
+#include <wchar.h>
 #include <ctype.h>
 #include <getopt.h>
 #include <signal.h>
-#include <ncurses.h>
+#include <ncursesw/ncurses.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -25,6 +28,7 @@
 #include <termios.h>
 #include <locale.h>
 #include "proc/procps.h"
+#include <errno.h>
 
 #ifdef FORCE_8BIT
 #undef isprint
@@ -134,6 +138,32 @@
 	}
 }
 
+// read a wide character from a popen'd stream
+#define MAX_ENC_BYTES 16
+wint_t my_getwc(FILE *s);
+wint_t my_getwc(FILE *s) {
+	char i[MAX_ENC_BYTES]; //assuming no encoding ever consumes more than 16 bytes
+	int byte = 0;
+	int convert;
+	int x;
+	wchar_t rval;
+	while(1) {
+		i[byte] = getc(s);
+		if (i[byte]==EOF) { return WEOF; }
+		byte++;
+		errno = 0;
+		mbtowc(NULL, NULL, 0);
+		convert = mbtowc(&rval, i, byte);
+		x = errno;
+		if(convert > 0) { return rval; } //legal conversion
+		if(byte == MAX_ENC_BYTES) {
+		while(byte > 1) { ungetc(i[--byte], s); } //at least *try* to fix up
+		errno = -EILSEQ;
+		return WEOF;
+		}
+	}
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -143,7 +173,10 @@
 	    option_help = 0, option_version = 0;
 	double interval = 2;
 	char *command;
+	wchar_t *wcommand = NULL;
 	int command_length = 0;	/* not including final \0 */
+	int wcommand_columns = 0;	/* not including final \0 */
+	int wcommand_characters = 0; /* not including final \0 */
 
 	setlocale(LC_ALL, "");
 	progname = argv[0];
@@ -216,6 +249,23 @@
 		command[command_length] = '\0';
 	}
 
+	// convert to wide for printing purposes
+	//mbstowcs(NULL, NULL, 0);
+	wcommand_characters = mbstowcs(NULL, command, 0);
+	if(wcommand_characters < 0) {
+		fprintf(stderr, "Unicode Handling Error\n");
+		exit(1);
+	}
+	wcommand = (wchar_t*)malloc((wcommand_characters+1) * sizeof(wcommand));
+	if(wcommand == NULL) {
+		fprintf(stderr, "Unicode Handling Error (malloc)\n");
+		exit(1);
+	}
+	mbstowcs(wcommand, command, wcommand_characters+1);
+	wcommand_columns = wcswidth(wcommand, -1);
+
+
+	
 	get_terminal_size();
 
 	/* Catch keyboard interrupts so we can put tty back in a sane state.  */
@@ -252,12 +302,44 @@
 		if (show_title) {
 			// left justify interval and command,
 			// right justify time, clipping all to fit window width
-			asprintf(&header, "Every %.1fs: %.*s",
-				interval, min(width - 1, command_length), command);
-			mvaddstr(0, 0, header);
-			if (strlen(header) > (size_t) (width - tsl - 1))
-				mvaddstr(0, width - tsl - 4, "...  ");
-			mvaddstr(0, width - tsl + 1, ts);
+
+			int hlen = asprintf(&header, "Every %.1fs: ", interval);
+
+			// the rules:
+			//   width < tsl : print nothing
+			//   width < tsl + hlen + 1: print ts
+			//   width = tsl + hlen + 1: print header, ts
+			//   width < tsl + hlen + 4: print header, ..., ts
+			//   width < tsl + hlen + wcommand_columns: print header, truncated wcommand, ..., ts
+			//   width > "": print header, wcomand, ts
+			// this is slightly different from how it used to be
+			if(width >= tsl) {
+				if(width >= tsl + hlen + 1) {
+					mvaddstr(0, 0, header);
+					if(width >= tsl + hlen + 2) {
+						if(width < tsl + hlen + 4) {
+							mvaddstr(0, width - tsl - 4, "...  ");
+						}else{
+							if(width < tsl + hlen + wcommand_columns) {
+								// print truncated
+								int avail_columns = width - tsl - hlen;
+								int using_columns = wcommand_columns;
+								int using_characters = wcommand_characters;
+								while(using_columns > avail_columns - 4) {
+									using_characters--;
+								using_columns = wcswidth(wcommand, using_characters);
+								}
+								mvaddnwstr(0, hlen, wcommand, using_characters);
+								mvaddstr(0, width - tsl - 4, "... ");
+							}else{
+								mvaddwstr(0, hlen, wcommand);
+							}
+						}
+					}
+				}
+				mvaddstr(0, width - tsl + 1, ts);
+			}
+			
 			free(header);
 		}
 
@@ -268,47 +350,62 @@
 
 		for (y = show_title; y < height; y++) {
 			int eolseen = 0, tabpending = 0;
+			wint_t carry = WEOF;
 			for (x = 0; x < width; x++) {
-				int c = ' ';
+				wint_t c = L' ';
 				int attr = 0;
 
 				if (!eolseen) {
 					/* if there is a tab pending, just spit spaces until the
 					   next stop instead of reading characters */
 					if (!tabpending)
-						do
-							c = getc(p);
-						while (c != EOF && !isprint(c)
-						       && c != '\n'
-						       && c != '\t');
-					if (c == '\n')
+						do {
+							if(carry == WEOF) {
+								c = my_getwc(p);
+							}else{
+								c = carry;
+								carry = WEOF;
+							}
+						}while (c != WEOF && !isprint(c) && c<128
+						       && wcwidth(c) == 0
+						       && c != L'\n'
+						       && c != L'\t');
+					if (c == L'\n')
 						if (!oldeolseen && x == 0) {
 							x = -1;
 							continue;
 						} else
 							eolseen = 1;
-					else if (c == '\t')
+					else if (c == L'\t')
 						tabpending = 1;
-					if (c == EOF || c == '\n' || c == '\t')
-						c = ' ';
+					if (x==width-1 && wcwidth(c)==2) {
+						y++;
+						x = -1; //process this double-width
+						carry = c; //character on the next line
+						continue; //because it won't fit here
+					}
+					if (c == WEOF || c == L'\n' || c == L'\t')
+						c = L' ';
 					if (tabpending && (((x + 1) % 8) == 0))
 						tabpending = 0;
 				}
 				move(y, x);
 				if (option_differences) {
-					chtype oldch = inch();
-					char oldc = oldch & A_CHARTEXT;
+						cchar_t oldc;
+					in_wch(&oldc);
 					attr = !first_screen
-					    && ((char)c != oldc
+					    && ((wchar_t)c != oldc.chars[0]
 						||
 						(option_differences_cumulative
-						 && (oldch & A_ATTRIBUTES)));
+						 && (oldc.attr & A_ATTRIBUTES)));
 				}
 				if (attr)
 					standout();
-				addch(c);
+				addnwstr((wchar_t*)&c,1);
 				if (attr)
 					standend();
+				if(wcwidth(c) == 0) { x--; }
+				if(wcwidth(c) == 2) { x++; }
 			}
 			oldeolseen = eolseen;
 		}

Reply via email to