In tr(1), if we have already generated a given character class we
don't need to do it again.

Further, we don't need to keep all the memory we allocate for the set
of characters in the class. NCHARS + 1 is just an upper bound on the
length.  We should return whatever we don't need.

This plugs a small leak.  You can see the difference with time(1):

# Unpatched.
$ command time -l tr -d $(jot -s'' -b[:graph:] 20000) < /dev/null
        0.11 real         0.09 user         0.02 sys
     43632  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
     10668  minor page faults
         0  major page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
         0  involuntary context switches

# Patched.  Note the huge difference in maximum RSS.
$ command time -l tr -d $(jot -s'' -b[:graph:] 20000) < /dev/null
        0.01 real         0.01 user         0.00 sys
      1740  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
       394  minor page faults
         0  major page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
         0  involuntary context switches

Personally I think using OOBCH as a sentinel value to mark the end of
the array is silly.  We can trivially track the length of the set in
the CLASS struct.  I'll save that for a later patch, though.  That
involves changing code elsewhere in str.c.

While here, switch to an ANSI function definition.

ok?

Index: str.c
===================================================================
RCS file: /cvs/src/usr.bin/tr/str.c,v
retrieving revision 1.13
diff -u -p -r1.13 str.c
--- str.c       31 Oct 2021 21:34:16 -0000      1.13
+++ str.c       31 Oct 2021 22:12:44 -0000
@@ -164,24 +164,38 @@ static CLASS classes[] = {
 };
 
 static void
-genclass(s)
-       STR *s;
+genclass(STR *s)
 {
-       int cnt, (*func)(int);
        CLASS *cp, tmp;
-       int *p;
+       size_t len;
+       int i;
 
        tmp.name = (char *)s->str;
        if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
            sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
                errx(1, "unknown class %s", s->str);
 
-       if ((cp->set = p = calloc(NCHARS + 1, sizeof(int))) == NULL)
-               errx(1, "no memory for a class");
-       for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
-               if ((func)(cnt))
-                       *p++ = cnt;
-       *p = OOBCH;
+       /*
+        * Generate the set of characters in the class if we haven't
+        * already done so.
+        */
+       if (cp->set == NULL) {
+               cp->set = reallocarray(NULL, NCHARS + 1, sizeof(*cp->set));
+               if (cp->set == NULL)
+                       err(1, NULL);
+               len = 0;
+               for (i = 0; i < NCHARS; i++) {
+                       if (cp->func(i)) {
+                               cp->set[len] = i;
+                               len++;
+                       }
+               }
+               cp->set[len] = OOBCH;
+               len++;
+               cp->set = reallocarray(cp->set, len, sizeof(*cp->set));
+               if (cp->set == NULL)
+                       err(1, NULL);
+       }
 
        s->cnt = 0;
        s->state = SET;

Reply via email to