Hi, apart from the random page addresses obtained form mmap(2) malloc(3) itself also randomizes cache en chunk operations. It uses a nibble of randomness per call, so optimize that to not waste half the random bits.
Please test, should be a bit faster. -Otto Index: malloc.c =================================================================== RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v retrieving revision 1.121 diff -u -p -r1.121 malloc.c --- malloc.c 27 Nov 2009 20:11:01 -0000 1.121 +++ malloc.c 30 Nov 2009 19:40:47 -0000 @@ -64,7 +64,7 @@ #define MALLOC_MAXCHUNK (1 << (MALLOC_PAGESHIFT-1)) #define MALLOC_MAXCACHE 256 -#define MALLOC_DELAYED_CHUNKS 16 /* should be power of 2 */ +#define MALLOC_DELAYED_CHUNKS 15 /* max of getrnibble() */ /* * When the P option is active, we move allocations between half a page * and a whole page towards the end, subject to alignment constraints. @@ -110,7 +110,7 @@ struct dir_info { /* free pages cache */ struct region_info free_regions[MALLOC_MAXCACHE]; /* delayed free chunk slots */ - void *delayed_chunks[MALLOC_DELAYED_CHUNKS]; + void *delayed_chunks[MALLOC_DELAYED_CHUNKS + 1]; #ifdef MALLOC_STATS size_t inserts; size_t insert_collisions; @@ -183,9 +183,9 @@ static int malloc_active; /* status of static size_t malloc_guarded; /* bytes used for guards */ static size_t malloc_used; /* bytes allocated */ -static size_t rbytesused; /* random bytes used */ +static size_t rnibblesused; /* random nibbles used */ static u_char rbytes[512]; /* random bytes */ -static u_char getrbyte(void); +static u_char getrnibble(void); extern char *__progname; @@ -378,6 +378,26 @@ wrterror(char *p) abort(); } +static void +rbytes_init(void) +{ + arc4random_buf(rbytes, sizeof(rbytes)); + rnibblesused = 0; +} + +static inline u_char +getrnibble(void) +{ + u_char x; + + if (rnibblesused >= 2 * sizeof(rbytes)) + rbytes_init(); + x = rnibblesused < sizeof(rbytes) ? (rbytes[rnibblesused] & 0xf) : + (rbytes[rnibblesused - sizeof(rbytes)] >> 4); + rnibblesused++; + return x; +} + /* * Cache maintenance. We keep at most malloc_cache pages cached. * If the cache is becoming full, unmap pages in the cache for real, @@ -408,7 +428,7 @@ unmap(struct dir_info *d, void *p, size_ rsz = mopts.malloc_cache - d->free_regions_size; if (psz > rsz) tounmap = psz - rsz; - offset = getrbyte(); + offset = getrnibble(); for (i = 0; tounmap > 0 && i < mopts.malloc_cache; i++) { r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)]; if (r->p != NULL) { @@ -489,7 +509,7 @@ map(struct dir_info *d, size_t sz, int z /* zero fill not needed */ return p; } - offset = getrbyte(); + offset = getrnibble(); for (i = 0; i < mopts.malloc_cache; i++) { r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)]; if (r->p != NULL) { @@ -536,21 +556,6 @@ map(struct dir_info *d, size_t sz, int z return p; } -static void -rbytes_init(void) -{ - arc4random_buf(rbytes, sizeof(rbytes)); - rbytesused = 0; -} - -static u_char -getrbyte(void) -{ - if (rbytesused >= sizeof(rbytes)) - rbytes_init(); - return rbytes[rbytesused++]; -} - /* * Initialize a dir_info, which should have been cleared by caller */ @@ -1010,7 +1015,7 @@ malloc_bytes(struct dir_info *d, size_t } /* advance a random # of positions */ - i = (getrbyte() & (MALLOC_DELAYED_CHUNKS - 1)) % bp->free; + i = getrnibble() % bp->free; while (i > 0) { u += u; k++; @@ -1273,7 +1278,7 @@ ofree(void *p) if (mopts.malloc_junk && sz > 0) memset(p, SOME_FREEJUNK, sz); if (!mopts.malloc_freeprot) { - i = getrbyte() & (MALLOC_DELAYED_CHUNKS - 1); + i = getrnibble(); tmp = p; p = g_pool->delayed_chunks[i]; g_pool->delayed_chunks[i] = tmp;