On Sat, Dec 30, 2017 at 06:53:44AM +0000, kshe wrote:

> Hi,
> 
> Looking at this diff and the previous one, I found some more possible
> cleanups for malloc.c (the patch below is to be applied after both of
> them, even if the second one has not been committed yet):
> 
> 1.  In malloc_bytes(), use ffs(3) instead of manual loops, which on many
> architectures boils down to merely one or two instructions (for example,
> see src/lib/libc/arch/amd64/string/ffs.S).

I remember doing some measurements using ffs a long time ago, and it
turned out that is was slower in some cases. Likely due to the
function call overhead or maybe a non-optimal implementation. So I'm
only willing to consider this after seeing benchmarks on a handfull of
architectures.

> 
> 2.  Slightly reorder find_chunksize() to avoid checking twice for the
> same condition.
> 
> 3.  Remove the outdated comment above omalloc_poolinit().
> 
> 4.  Remove extra braces enclosing a switch case, made unnecessary by
> revision 1.233.
> 
> 5.  Some miscellaneous style and whitespace fixes.

The other chunks I'll consider after the diff below (or a succcesor)
has been committed.

        -Otto

> 
> --- malloc.c.orig     Fri Dec 29 00:06:24 2017
> +++ malloc.c  Fri Dec 29 06:02:04 2017
> @@ -376,12 +376,11 @@ omalloc_parseopt(char opt)
>       case 'X':
>               mopts.malloc_xmalloc = 1;
>               break;
> -     default: {
> +     default:
>               dprintf(STDERR_FILENO, "malloc() warning: "
> -                    "unknown char in MALLOC_OPTIONS\n");
> +                 "unknown char in MALLOC_OPTIONS\n");
>               break;
>       }
> -     }
>  }
>  
>  static void
> @@ -448,9 +447,6 @@ omalloc_init(void)
>               ;
>  }
>  
> -/*
> - * Initialize a dir_info, which should have been cleared by caller
> - */
>  static void
>  omalloc_poolinit(struct dir_info **dp)
>  {
> @@ -502,7 +498,7 @@ omalloc_grow(struct dir_info *d)
>       size_t i;
>       struct region_info *p;
>  
> -     if (d->regions_total > SIZE_MAX / sizeof(struct region_info) / 2 )
> +     if (d->regions_total > SIZE_MAX / sizeof(struct region_info) / 2)
>               return 1;
>  
>       newtotal = d->regions_total * 2;
> @@ -828,7 +824,7 @@ alloc_chunk_info(struct dir_info *d, int bits)
>                       return NULL;
>               STATS_ADD(d->malloc_used, MALLOC_PAGESIZE);
>               count = MALLOC_PAGESIZE / size;
> -             
> +
>               if (bits == 0) {
>                       shift = 1;
>                       i = MALLOC_MINSIZE - 1;
> @@ -903,23 +899,21 @@ err:
>  static int
>  find_chunksize(size_t size)
>  {
> -     int             i, j;
> +     int r;
>  
> -     /* Don't bother with anything less than this */
> -     /* unless we have a malloc(0) requests */
> -     if (size != 0 && size < MALLOC_MINSIZE)
> +     /* malloc(0) is special */
> +     if (size == 0)
> +             return 0;
> +
> +     if (size < MALLOC_MINSIZE)
>               size = MALLOC_MINSIZE;
> +     size--;
>  
> -     /* Find the right bucket */
> -     if (size == 0)
> -             j = 0;
> -     else {
> -             j = MALLOC_MINSHIFT;
> -             i = (size - 1) >> (MALLOC_MINSHIFT - 1);
> -             while (i >>= 1)
> -                     j++;
> -     }
> -     return j;
> +     r = MALLOC_MINSHIFT;
> +     while (size >> r != 0)
> +             r++;
> +
> +     return r;
>  }
>  
>  static void
> @@ -941,7 +935,7 @@ malloc_bytes(struct dir_info *d, size_t size, void *f)
>       u_int i, r;
>       int j, listnum;
>       size_t k;
> -     u_short u, b, *lp;
> +     u_short *lp;
>       struct chunk_info *bp;
>       void *p;
>  
> @@ -970,15 +964,12 @@ malloc_bytes(struct dir_info *d, size_t size, void *f)
>       /* start somewhere in a short */
>       lp = &bp->bits[i / MALLOC_BITS];
>       if (*lp) {
> -             b = *lp;
> -             k = i % MALLOC_BITS;
> -             u = 1 << k;
> -             while (k < MALLOC_BITS) {
> -                     if (b & u)
> -                             goto found;
> -                     k++;
> -                     u <<= 1;
> -             } 
> +             j = i % MALLOC_BITS;
> +             k = ffs(*lp >> j);
> +             if (k != 0) {
> +                     k += j - 1;
> +                     goto found;
> +             }
>       }
>       /* no bit halfway, go to next full short */
>       i /= MALLOC_BITS;
> @@ -986,16 +977,10 @@ malloc_bytes(struct dir_info *d, size_t size, void *f)
>               if (++i >= bp->total / MALLOC_BITS)
>                       i = 0;
>               lp = &bp->bits[i];
> -             if (*lp) {
> -                     b = *lp;
> -                     k = 0;
> -                     u = 1;
> -                     for (;;) {
> -                             if (b & u)
> -                                     goto found;
> -                             k++;
> -                             u <<= 1;
> -                     }
> +             k = ffs(*lp);
> +             if (k != 0) {
> +                     k--;
> +                     break;
>               }
>       }
>  found:
> @@ -1006,10 +991,10 @@ found:
>       }
>  #endif
>  
> -     *lp ^= u;
> +     *lp ^= 1 << k;
>  
>       /* If there are no more free, remove from free-list */
> -     if (!--bp->free)
> +     if (--bp->free == 0)
>               LIST_REMOVE(bp, entries);
>  
>       /* Adjust to the real offset of that chunk */
> @@ -1045,7 +1030,7 @@ validate_canary(struct dir_info *d, u_char *ptr, size_
>               if (*p != SOME_JUNK) {
>                       wrterror(d, "chunk canary corrupted %p %#tx@%#zx%s",
>                           ptr, p - ptr, sz, *p == SOME_FREEJUNK ?
> -                             " (double free?)" : "");
> +                         " (double free?)" : "");
>               }
>               p++;
>       }
> @@ -1169,8 +1154,7 @@ omalloc(struct dir_info *pool, size_t sz, int zero_fil
>                               else
>                                       memset(p, SOME_JUNK,
>                                           psz - mopts.malloc_guard);
> -                     }
> -                     else if (mopts.chunk_canaries)
> +                     } else if (mopts.chunk_canaries)
>                               fill_canary(p, sz - mopts.malloc_guard,
>                                   psz - mopts.malloc_guard);
>               }
> @@ -1221,7 +1205,7 @@ _malloc_init(int from_rthreads)
>       max = from_rthreads ? _MALLOC_MUTEXES : 1;
>       if (((uintptr_t)&malloc_readonly & MALLOC_PAGEMASK) == 0)
>               mprotect(&malloc_readonly, sizeof(malloc_readonly),
> -                  PROT_READ | PROT_WRITE);
> +                 PROT_READ | PROT_WRITE);
>       for (i = 0; i < max; i++) {
>               if (mopts.malloc_pool[i])
>                       continue;
> @@ -2026,8 +2010,7 @@ omemalign(struct dir_info *pool, size_t alignment, siz
>                           SOME_JUNK, psz - sz);
>               else
>                       memset(p, SOME_JUNK, psz - mopts.malloc_guard);
> -     }
> -     else if (mopts.chunk_canaries)
> +     } else if (mopts.chunk_canaries)
>               fill_canary(p, sz - mopts.malloc_guard,
>                   psz - mopts.malloc_guard);
>  
> 
> Regards,
> 
> kshe

Reply via email to