10 дек. 2015 г. 18:51 пользователь "Ingo Schwarze" <schwa...@usta.de> написал: > > Hi, > > here is a simple one. The uniq(1) utility only needs UTF-8 support > to distinguish blank and non-blank characters with -f and to skip > characters with -s. The former is easy to implement with mbtowc(3) > and iswblank(3), the latter with mblen(3). There is no need for > wrapper functions or a seperate utf8.c file. > > OK? > Ingo > > > Index: uniq.1 > =================================================================== > RCS file: /cvs/src/usr.bin/uniq/uniq.1,v > retrieving revision 1.17 > diff -u -p -r1.17 uniq.1 > --- uniq.1 3 Sep 2010 11:09:29 -0000 1.17 > +++ uniq.1 10 Dec 2015 15:37:02 -0000 > @@ -114,6 +114,14 @@ A file name of > .Ql - > denotes the standard input or the standard output > .Pq depending on its position on the command line . > +.Sh ENVIRONMENT > +.Bl -tag -width LC_CTYPE > +.It Ev LC_CTYPE > +The character set > +.Xr locale 1 . > +Determines which groups of bytes are treated as characters > +and which characters are considered blank. > +.El > .Sh EXIT STATUS > .Ex -std uniq > .Sh SEE ALSO > Index: uniq.c > =================================================================== > RCS file: /cvs/src/usr.bin/uniq/uniq.c,v > retrieving revision 1.23 > diff -u -p -r1.23 uniq.c > --- uniq.c 2 Nov 2015 20:25:42 -0000 1.23 > +++ uniq.c 10 Dec 2015 15:37:02 -0000 > @@ -37,10 +37,13 @@ > #include <err.h> > #include <errno.h> > #include <limits.h> > +#include <locale.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > #include <unistd.h> > +#include <wchar.h> > +#include <wctype.h> > > #define MAXLINELEN (8 * 1024) > > @@ -61,6 +64,8 @@ main(int argc, char *argv[]) > int ch; > char *prevline, *thisline; > > + setlocale(LC_CTYPE, ""); > + > if (pledge("stdio rpath wpath cpath", NULL) == -1) > err(1, "pledge"); > > @@ -176,16 +181,38 @@ show(FILE *ofp, char *str) > char * > skip(char *str) > { > + wchar_t wc; > int nchars, nfields; > + int len; > > for (nfields = numfields; nfields && *str; nfields--) { > - while (isblank((unsigned char)*str)) > - str++; > - while (*str && !isblank((unsigned char)*str)) > - str++; > + > + /* Skip blanks before the next field. */ > + do { > + if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) { > + (void)mbtowc(NULL, NULL, MB_CUR_MAX); > + wc = L'?'; > + len = 1; > + } > + str += len; > + } while (*str != '\0' && iswblank(wc)); > + > + /* Skip one field. */ > + while (*str != '\0' && !iswblank(wc)) { > + if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) { > + (void)mbtowc(NULL, NULL, MB_CUR_MAX); > + wc = L'?'; > + len = 1; > + } > + str += len; > + } > } > - for (nchars = numchars; nchars-- && *str && *str != '\n'; ++str) > - ; > + > + /* Skip some additional characters. */ > + for (nchars = numchars; nchars-- && *str != '\0'; str += len) > + if ((len = mblen(str, MB_CUR_MAX)) == -1) > + len = 1; > + > return (str); > }
Reads good to me, okay zhuk@. -- Vadim Zhukov