Hi,

apart from the random page addresses obtained form mmap(2) malloc(3)
itself also randomizes cache en chunk operations. It uses a nibble of
randomness per call, so optimize that to not waste half the random
bits. 

Please test, should be a bit faster.

        -Otto
        
Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.121
diff -u -p -r1.121 malloc.c
--- malloc.c    27 Nov 2009 20:11:01 -0000      1.121
+++ malloc.c    30 Nov 2009 19:40:47 -0000
@@ -64,7 +64,7 @@
 
 #define MALLOC_MAXCHUNK                (1 << (MALLOC_PAGESHIFT-1))
 #define MALLOC_MAXCACHE                256
-#define MALLOC_DELAYED_CHUNKS  16      /* should be power of 2 */
+#define MALLOC_DELAYED_CHUNKS  15      /* max of getrnibble() */
 /*
  * When the P option is active, we move allocations between half a page
  * and a whole page towards the end, subject to alignment constraints.
@@ -110,7 +110,7 @@ struct dir_info {
                                        /* free pages cache */
        struct region_info free_regions[MALLOC_MAXCACHE];
                                        /* delayed free chunk slots */
-       void *delayed_chunks[MALLOC_DELAYED_CHUNKS];
+       void *delayed_chunks[MALLOC_DELAYED_CHUNKS + 1];
 #ifdef MALLOC_STATS
        size_t inserts;
        size_t insert_collisions;
@@ -183,9 +183,9 @@ static int  malloc_active;          /* status of 
 static size_t  malloc_guarded;         /* bytes used for guards */
 static size_t  malloc_used;            /* bytes allocated */
 
-static size_t rbytesused;              /* random bytes used */
+static size_t rnibblesused;            /* random nibbles used */
 static u_char rbytes[512];             /* random bytes */
-static u_char getrbyte(void);
+static u_char getrnibble(void);
 
 extern char    *__progname;
 
@@ -378,6 +378,26 @@ wrterror(char *p)
                abort();
 }
 
+static void
+rbytes_init(void)
+{
+       arc4random_buf(rbytes, sizeof(rbytes));
+       rnibblesused = 0;
+}
+
+static inline u_char
+getrnibble(void)
+{
+       u_char x;
+
+       if (rnibblesused >= 2 * sizeof(rbytes))
+               rbytes_init();
+       x = rnibblesused < sizeof(rbytes) ? (rbytes[rnibblesused] & 0xf) :
+           (rbytes[rnibblesused - sizeof(rbytes)] >> 4);
+       rnibblesused++;
+       return x;
+}
+
 /*
  * Cache maintenance. We keep at most malloc_cache pages cached.
  * If the cache is becoming full, unmap pages in the cache for real,
@@ -408,7 +428,7 @@ unmap(struct dir_info *d, void *p, size_
        rsz = mopts.malloc_cache - d->free_regions_size;
        if (psz > rsz)
                tounmap = psz - rsz;
-       offset = getrbyte();
+       offset = getrnibble();
        for (i = 0; tounmap > 0 && i < mopts.malloc_cache; i++) {
                r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
                if (r->p != NULL) {
@@ -489,7 +509,7 @@ map(struct dir_info *d, size_t sz, int z
                /* zero fill not needed */
                return p;
        }
-       offset = getrbyte();
+       offset = getrnibble();
        for (i = 0; i < mopts.malloc_cache; i++) {
                r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
                if (r->p != NULL) {
@@ -536,21 +556,6 @@ map(struct dir_info *d, size_t sz, int z
        return p;
 }
 
-static void
-rbytes_init(void)
-{
-       arc4random_buf(rbytes, sizeof(rbytes));
-       rbytesused = 0;
-}
-
-static u_char
-getrbyte(void)
-{
-       if (rbytesused >= sizeof(rbytes))
-               rbytes_init();
-       return rbytes[rbytesused++];
-}
-
 /*
  * Initialize a dir_info, which should have been cleared by caller
  */
@@ -1010,7 +1015,7 @@ malloc_bytes(struct dir_info *d, size_t 
        }
 
        /* advance a random # of positions */
-       i = (getrbyte() & (MALLOC_DELAYED_CHUNKS - 1)) % bp->free;
+       i = getrnibble() % bp->free;
        while (i > 0) {
                u += u;
                k++;
@@ -1273,7 +1278,7 @@ ofree(void *p)
                if (mopts.malloc_junk && sz > 0)
                        memset(p, SOME_FREEJUNK, sz);
                if (!mopts.malloc_freeprot) {
-                       i = getrbyte() & (MALLOC_DELAYED_CHUNKS - 1);
+                       i = getrnibble();
                        tmp = p;
                        p = g_pool->delayed_chunks[i];
                        g_pool->delayed_chunks[i] = tmp;

Reply via email to