On Sat, Dec 30, 2017 at 06:53:44AM +0000, kshe wrote: > Hi, > > Looking at this diff and the previous one, I found some more possible > cleanups for malloc.c (the patch below is to be applied after both of > them, even if the second one has not been committed yet): > > 1. In malloc_bytes(), use ffs(3) instead of manual loops, which on many > architectures boils down to merely one or two instructions (for example, > see src/lib/libc/arch/amd64/string/ffs.S).
I remember doing some measurements using ffs a long time ago, and it turned out that is was slower in some cases. Likely due to the function call overhead or maybe a non-optimal implementation. So I'm only willing to consider this after seeing benchmarks on a handfull of architectures. > > 2. Slightly reorder find_chunksize() to avoid checking twice for the > same condition. > > 3. Remove the outdated comment above omalloc_poolinit(). > > 4. Remove extra braces enclosing a switch case, made unnecessary by > revision 1.233. > > 5. Some miscellaneous style and whitespace fixes. The other chunks I'll consider after the diff below (or a succcesor) has been committed. -Otto > > --- malloc.c.orig Fri Dec 29 00:06:24 2017 > +++ malloc.c Fri Dec 29 06:02:04 2017 > @@ -376,12 +376,11 @@ omalloc_parseopt(char opt) > case 'X': > mopts.malloc_xmalloc = 1; > break; > - default: { > + default: > dprintf(STDERR_FILENO, "malloc() warning: " > - "unknown char in MALLOC_OPTIONS\n"); > + "unknown char in MALLOC_OPTIONS\n"); > break; > } > - } > } > > static void > @@ -448,9 +447,6 @@ omalloc_init(void) > ; > } > > -/* > - * Initialize a dir_info, which should have been cleared by caller > - */ > static void > omalloc_poolinit(struct dir_info **dp) > { > @@ -502,7 +498,7 @@ omalloc_grow(struct dir_info *d) > size_t i; > struct region_info *p; > > - if (d->regions_total > SIZE_MAX / sizeof(struct region_info) / 2 ) > + if (d->regions_total > SIZE_MAX / sizeof(struct region_info) / 2) > return 1; > > newtotal = d->regions_total * 2; > @@ -828,7 +824,7 @@ alloc_chunk_info(struct dir_info *d, int bits) > return NULL; > STATS_ADD(d->malloc_used, MALLOC_PAGESIZE); > count = MALLOC_PAGESIZE / size; > - > + > if (bits == 0) { > shift = 1; > i = MALLOC_MINSIZE - 1; > @@ -903,23 +899,21 @@ err: > static int > find_chunksize(size_t size) > { > - int i, j; > + int r; > > - /* Don't bother with anything less than this */ > - /* unless we have a malloc(0) requests */ > - if (size != 0 && size < MALLOC_MINSIZE) > + /* malloc(0) is special */ > + if (size == 0) > + return 0; > + > + if (size < MALLOC_MINSIZE) > size = MALLOC_MINSIZE; > + size--; > > - /* Find the right bucket */ > - if (size == 0) > - j = 0; > - else { > - j = MALLOC_MINSHIFT; > - i = (size - 1) >> (MALLOC_MINSHIFT - 1); > - while (i >>= 1) > - j++; > - } > - return j; > + r = MALLOC_MINSHIFT; > + while (size >> r != 0) > + r++; > + > + return r; > } > > static void > @@ -941,7 +935,7 @@ malloc_bytes(struct dir_info *d, size_t size, void *f) > u_int i, r; > int j, listnum; > size_t k; > - u_short u, b, *lp; > + u_short *lp; > struct chunk_info *bp; > void *p; > > @@ -970,15 +964,12 @@ malloc_bytes(struct dir_info *d, size_t size, void *f) > /* start somewhere in a short */ > lp = &bp->bits[i / MALLOC_BITS]; > if (*lp) { > - b = *lp; > - k = i % MALLOC_BITS; > - u = 1 << k; > - while (k < MALLOC_BITS) { > - if (b & u) > - goto found; > - k++; > - u <<= 1; > - } > + j = i % MALLOC_BITS; > + k = ffs(*lp >> j); > + if (k != 0) { > + k += j - 1; > + goto found; > + } > } > /* no bit halfway, go to next full short */ > i /= MALLOC_BITS; > @@ -986,16 +977,10 @@ malloc_bytes(struct dir_info *d, size_t size, void *f) > if (++i >= bp->total / MALLOC_BITS) > i = 0; > lp = &bp->bits[i]; > - if (*lp) { > - b = *lp; > - k = 0; > - u = 1; > - for (;;) { > - if (b & u) > - goto found; > - k++; > - u <<= 1; > - } > + k = ffs(*lp); > + if (k != 0) { > + k--; > + break; > } > } > found: > @@ -1006,10 +991,10 @@ found: > } > #endif > > - *lp ^= u; > + *lp ^= 1 << k; > > /* If there are no more free, remove from free-list */ > - if (!--bp->free) > + if (--bp->free == 0) > LIST_REMOVE(bp, entries); > > /* Adjust to the real offset of that chunk */ > @@ -1045,7 +1030,7 @@ validate_canary(struct dir_info *d, u_char *ptr, size_ > if (*p != SOME_JUNK) { > wrterror(d, "chunk canary corrupted %p %#tx@%#zx%s", > ptr, p - ptr, sz, *p == SOME_FREEJUNK ? > - " (double free?)" : ""); > + " (double free?)" : ""); > } > p++; > } > @@ -1169,8 +1154,7 @@ omalloc(struct dir_info *pool, size_t sz, int zero_fil > else > memset(p, SOME_JUNK, > psz - mopts.malloc_guard); > - } > - else if (mopts.chunk_canaries) > + } else if (mopts.chunk_canaries) > fill_canary(p, sz - mopts.malloc_guard, > psz - mopts.malloc_guard); > } > @@ -1221,7 +1205,7 @@ _malloc_init(int from_rthreads) > max = from_rthreads ? _MALLOC_MUTEXES : 1; > if (((uintptr_t)&malloc_readonly & MALLOC_PAGEMASK) == 0) > mprotect(&malloc_readonly, sizeof(malloc_readonly), > - PROT_READ | PROT_WRITE); > + PROT_READ | PROT_WRITE); > for (i = 0; i < max; i++) { > if (mopts.malloc_pool[i]) > continue; > @@ -2026,8 +2010,7 @@ omemalign(struct dir_info *pool, size_t alignment, siz > SOME_JUNK, psz - sz); > else > memset(p, SOME_JUNK, psz - mopts.malloc_guard); > - } > - else if (mopts.chunk_canaries) > + } else if (mopts.chunk_canaries) > fill_canary(p, sz - mopts.malloc_guard, > psz - mopts.malloc_guard); > > > Regards, > > kshe