On Thu, Sep 29, 2022 at 08:57:04AM +0000, Job Snijders wrote:
> Hi all,
> 
> I often find myself piping data through ... | awk '{print length}' | ...
> I figured there should be a more direct way that requires less typing.
> Perhaps other developers have a similar itch? 
> 
> The FreeBSD, NetBSD, Dragonfly, and GNU variants of the wc(1) utility
> have a similar -L feature.

That isn't an argument for merit or good taste.  Choice of flag, sure.

wc counts items in files.  Finding the longest item indeed sounds
like a task better suited to awk.

> 
> Kind regards,
> 
> Job
> 
> Index: wc.1
> ===================================================================
> RCS file: /cvs/src/usr.bin/wc/wc.1,v
> retrieving revision 1.27
> diff -u -p -r1.27 wc.1
> --- wc.1      24 Oct 2016 13:46:58 -0000      1.27
> +++ wc.1      21 Sep 2022 15:47:29 -0000
> @@ -41,7 +41,7 @@
>  .Sh SYNOPSIS
>  .Nm wc
>  .Op Fl c | m
> -.Op Fl hlw
> +.Op Fl hLlw
>  .Op Ar
>  .Sh DESCRIPTION
>  The
> @@ -68,6 +68,14 @@ is written to the standard output.
>  Use unit suffixes: Byte, Kilobyte, Megabyte, Gigabyte, Terabyte,
>  Petabyte, and Exabyte in order to reduce the number of digits to four or 
> fewer
>  using powers of 2 for sizes (K=1024, M=1048576, etc.).
> +.It Fl L
> +Write the length of the longest line to the standard output.
> +Length is the number of bytes counted, or the number of characters if the
> +.Fl m
> +flag is specified.
> +If more than one input file is specified,
> +the length of the longest line of all files is reported as the value of
> +.Qq total .
>  .It Fl l
>  The number of lines in each input file
>  is written to the standard output.
> @@ -128,9 +136,9 @@ utility is compliant with the
>  .St -p1003.1-2008
>  specification.
>  .Pp
> -The flag
> -.Op Fl h
> -is an extension to that specification.
> +The flags
> +.Op Fl Lh
> +are extensions to that specification.
>  .Sh HISTORY
>  A
>  .Nm
> Index: wc.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/wc/wc.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 wc.c
> --- wc.c      2 Sep 2022 15:21:40 -0000       1.30
> +++ wc.c      21 Sep 2022 15:47:29 -0000
> @@ -44,12 +44,12 @@
>  
>  #define      _MAXBSIZE (64 * 1024)
>  
> -int64_t      tlinect, twordct, tcharct;
> -int  doline, doword, dochar, humanchar, multibyte;
> +int64_t      tlinect, twordct, tcharct, tlongest;
> +int  doline, doword, dochar, dolongest, humanchar, multibyte;
>  int  rval;
>  extern char *__progname;
>  
> -static void print_counts(int64_t, int64_t, int64_t, const char *);
> +static void print_counts(int64_t, int64_t, int64_t, int64_t, const char *);
>  static void format_and_print(int64_t);
>  static void cnt(const char *);
>  
> @@ -63,8 +63,11 @@ main(int argc, char *argv[])
>       if (pledge("stdio rpath", NULL) == -1)
>               err(1, "pledge");
>  
> -     while ((ch = getopt(argc, argv, "lwchm")) != -1)
> +     while ((ch = getopt(argc, argv, "Llwchm")) != -1)
>               switch(ch) {
> +             case 'L':
> +                     dolongest = 1;
> +                     break;
>               case 'l':
>                       doline = 1;
>                       break;
> @@ -84,7 +87,7 @@ main(int argc, char *argv[])
>               case '?':
>               default:
>                       fprintf(stderr,
> -                         "usage: %s [-c | -m] [-hlw] [file ...]\n",
> +                         "usage: %s [-c | -m] [-hLlw] [file ...]\n",
>                           __progname);
>                       return 1;
>               }
> @@ -96,7 +99,7 @@ main(int argc, char *argv[])
>        * if you don't get any arguments, you have to turn them
>        * all on.
>        */
> -     if (!doline && !doword && !dochar)
> +     if (!doline && !doword && !dochar && !dolongest)
>               doline = doword = dochar = 1;
>  
>       if (!*argv) {
> @@ -109,7 +112,8 @@ main(int argc, char *argv[])
>               } while(*++argv);
>  
>               if (dototal)
> -                     print_counts(tlinect, twordct, tcharct, "total");
> +                     print_counts(tlinect, twordct, tcharct, tlongest,
> +                         "total");
>       }
>  
>       return rval;
> @@ -127,11 +131,11 @@ cnt(const char *path)
>       wchar_t wc;
>       short gotsp;
>       ssize_t len;
> -     int64_t linect, wordct, charct;
> +     uint64_t linect, wordct, charct, longct, tmpll;
>       struct stat sbuf;
>       int fd;
>  
> -     linect = wordct = charct = 0;
> +     linect = wordct = charct = longct = tmpll = 0;
>       stream = NULL;
>       if (path != NULL) {
>               file = path;
> @@ -180,12 +184,19 @@ cnt(const char *path)
>                * faster to get lines than to get words, since
>                * the word count requires some logic.
>                */
> -             else if (doline) {
> +             else if (doline || dolongest) {
>                       while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
>                               charct += len;
> -                             for (C = buf; len--; ++C)
> -                                     if (*C == '\n')
> +                             for (C = buf; len--; ++C) {
> +                                     if (*C == '\n') {
> +                                             if (tmpll > longct)
> +                                                     longct = tmpll;
> +                                             tmpll = 0;
>                                               ++linect;
> +                                     } else {
> +                                             ++tmpll;
> +                                     }
> +                             }
>                       }
>                       if (len == -1) {
>                               warn("%s", file);
> @@ -243,10 +254,16 @@ cnt(const char *path)
>                                       wc = L'?';
>                               } else if (len == 0)
>                                       len = 1;
> +                             if (wc != L'\n')
> +                                     ++tmpll;
>                               if (iswspace(wc)) {
>                                       gotsp = 1;
> -                                     if (wc == L'\n')
> +                                     if (wc == L'\n') {
> +                                             if (tmpll > longct)
> +                                                     longct = tmpll;
> +                                             tmpll = 0;
>                                               ++linect;
> +                                     }
>                               } else if (gotsp) {
>                                       gotsp = 0;
>                                       ++wordct;
> @@ -259,7 +276,7 @@ cnt(const char *path)
>               }
>       }
>  
> -     print_counts(linect, wordct, charct, path);
> +     print_counts(linect, wordct, charct, longct, path);
>  
>       /*
>        * Don't bother checking doline, doword, or dochar -- speeds
> @@ -268,6 +285,8 @@ cnt(const char *path)
>       tlinect += linect;
>       twordct += wordct;
>       tcharct += charct;
> +     if (dolongest && longct > tlongest)
> +             tlongest = longct;
>  
>       if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
>               warn("%s", file);
> @@ -289,7 +308,8 @@ format_and_print(int64_t v)
>  }
>  
>  static void
> -print_counts(int64_t lines, int64_t words, int64_t chars, const char *name)
> +print_counts(int64_t lines, int64_t words, int64_t chars, int64_t longest,
> +    const char *name)
>  {
>       if (doline)
>               format_and_print(lines);
> @@ -297,6 +317,8 @@ print_counts(int64_t lines, int64_t word
>               format_and_print(words);
>       if (dochar)
>               format_and_print(chars);
> +     if (dolongest)
> +             format_and_print(longest);
>  
>       if (name)
>               printf(" %s\n", name);
> 
> 

Reply via email to