Hi,

This diff implements a more flexible approach for the number of pools
malloc uses in the multi-threaded case. At the momemt I do not intend
to commit this as-is, I first need this to get some feedback on what
the proper default should be.

Currently the number of pools is fixed at 4. More pools mean less
contention for allocations, but free becomes more expensive since a
thread might need to check other pools increasing contention.

I'd like to know how this diff behaves using your favorite
mutli-threaded application. Often this will be a web-browser I guess.

Test instructions:

0. Make sure you are running current.

1. Do a baseline test of your application.

2. Apply diff, build and install userland.

3. Run your test application with MALLOC_OPTIONS=value, where value is: 
"", +, -, ++, -- and +++.

e.g. 

        MALLOC_OPTIONS=++ chrome

Note performance. Do multiple tests to get better statistics.

If you're not able to do full tests, at least general observations are
welcome. Tell a bit about the system you tested on (e.g. number of
cores). Note that due to randomization, different runs might show
different performance numbers since the pools shared by subsets of
threads can turn out differently.

Thanks,

        -Otto

Index: include/thread_private.h
===================================================================
RCS file: /cvs/src/lib/libc/include/thread_private.h,v
retrieving revision 1.33
diff -u -p -r1.33 thread_private.h
--- include/thread_private.h    5 Dec 2017 13:45:31 -0000       1.33
+++ include/thread_private.h    19 Dec 2018 06:52:07 -0000
@@ -7,7 +7,7 @@
 
 #include <stdio.h>             /* for FILE and __isthreaded */
 
-#define _MALLOC_MUTEXES 4
+#define _MALLOC_MUTEXES 32
 void _malloc_init(int);
 #ifdef __LIBC__
 PROTO_NORMAL(_malloc_init);
Index: stdlib/malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.257
diff -u -p -r1.257 malloc.c
--- stdlib/malloc.c     10 Dec 2018 07:57:49 -0000      1.257
+++ stdlib/malloc.c     19 Dec 2018 06:52:07 -0000
@@ -143,6 +143,8 @@ struct dir_info {
        size_t cheap_reallocs;
        size_t malloc_used;             /* bytes allocated */
        size_t malloc_guarded;          /* bytes used for guards */
+       size_t pool_searches;           /* searches for pool */
+       size_t other_pool;              /* searches in other pool */
 #define STATS_ADD(x,y) ((x) += (y))
 #define STATS_SUB(x,y) ((x) -= (y))
 #define STATS_INC(x)   ((x)++)
@@ -179,7 +181,9 @@ struct chunk_info {
 };
 
 struct malloc_readonly {
-       struct dir_info *malloc_pool[_MALLOC_MUTEXES];  /* Main bookkeeping 
information */
+                                       /* Main bookkeeping information */
+       struct dir_info *malloc_pool[_MALLOC_MUTEXES];
+       u_int   malloc_mutexes;         /* how much in actual use? */
        int     malloc_mt;              /* multi-threaded mode? */
        int     malloc_freecheck;       /* Extensive double free check */
        int     malloc_freeunmap;       /* mprotect free pages PROT_NONE? */
@@ -267,7 +271,7 @@ getpool(void)
                return mopts.malloc_pool[0];
        else
                return mopts.malloc_pool[TIB_GET()->tib_tid &
-                   (_MALLOC_MUTEXES - 1)];
+                   (mopts.malloc_mutexes - 1)];
 }
 
 static __dead void
@@ -316,6 +320,16 @@ static void
 omalloc_parseopt(char opt)
 {
        switch (opt) {
+       case '+':
+               mopts.malloc_mutexes <<= 1;
+               if (mopts.malloc_mutexes > _MALLOC_MUTEXES)
+                       mopts.malloc_mutexes = _MALLOC_MUTEXES;
+               break;
+       case '-':
+               mopts.malloc_mutexes >>= 1;
+               if (mopts.malloc_mutexes < 1)
+                       mopts.malloc_mutexes = 1;
+               break;
        case '>':
                mopts.malloc_cache <<= 1;
                if (mopts.malloc_cache > MALLOC_MAXCACHE)
@@ -395,6 +409,7 @@ omalloc_init(void)
        /*
         * Default options
         */
+       mopts.malloc_mutexes = 4;
        mopts.malloc_junk = 1;
        mopts.malloc_cache = MALLOC_DEFAULT_CACHE;
 
@@ -485,7 +500,7 @@ omalloc_poolinit(struct dir_info **dp)
                for (j = 0; j < MALLOC_CHUNK_LISTS; j++)
                        LIST_INIT(&d->chunk_dir[i][j]);
        }
-       STATS_ADD(d->malloc_used, regioninfo_size);
+       STATS_ADD(d->malloc_used, regioninfo_size + 3 * MALLOC_PAGESIZE);
        d->canary1 = mopts.malloc_canary ^ (u_int32_t)(uintptr_t)d;
        d->canary2 = ~d->canary1;
 
@@ -1196,7 +1211,7 @@ _malloc_init(int from_rthreads)
        if (!mopts.malloc_canary)
                omalloc_init();
 
-       max = from_rthreads ? _MALLOC_MUTEXES : 1;
+       max = from_rthreads ? mopts.malloc_mutexes : 1;
        if (((uintptr_t)&malloc_readonly & MALLOC_PAGEMASK) == 0)
                mprotect(&malloc_readonly, sizeof(malloc_readonly),
                    PROT_READ | PROT_WRITE);
@@ -1281,16 +1296,19 @@ findpool(void *p, struct dir_info *argpo
        struct dir_info *pool = argpool;
        struct region_info *r = find(pool, p);
 
+       STATS_INC(pool->pool_searches);
        if (r == NULL) {
                if (mopts.malloc_mt)  {
                        int i;
 
-                       for (i = 0; i < _MALLOC_MUTEXES; i++) {
-                               if (i == argpool->mutex)
-                                       continue;
+                       STATS_INC(pool->other_pool);
+                       for (i = 1; i < mopts.malloc_mutexes; i++) {
+                               int j = (argpool->mutex + i) &
+                                   (mopts.malloc_mutexes - 1);
+
                                pool->active--;
                                _MALLOC_UNLOCK(pool->mutex);
-                               pool = mopts.malloc_pool[i];
+                               pool = mopts.malloc_pool[j];
                                _MALLOC_LOCK(pool->mutex);
                                pool->active++;
                                r = find(pool, p);
@@ -2220,14 +2238,13 @@ malloc_dump1(int fd, int poolno, struct 
                return;
        dprintf(fd, "Region slots free %zu/%zu\n",
                d->regions_free, d->regions_total);
-       dprintf(fd, "Finds %zu/%zu\n", d->finds,
-           d->find_collisions);
-       dprintf(fd, "Inserts %zu/%zu\n", d->inserts,
-           d->insert_collisions);
-       dprintf(fd, "Deletes %zu/%zu\n", d->deletes,
-           d->delete_moves);
+       dprintf(fd, "Finds %zu/%zu\n", d->finds, d->find_collisions);
+       dprintf(fd, "Inserts %zu/%zu\n", d->inserts, d->insert_collisions);
+       dprintf(fd, "Deletes %zu/%zu\n", d->deletes, d->delete_moves);
        dprintf(fd, "Cheap reallocs %zu/%zu\n",
            d->cheap_reallocs, d->cheap_realloc_tries);
+       dprintf(fd, "Other pool searches %zu/%zu\n",
+           d->other_pool, d->pool_searches);
        dprintf(fd, "In use %zu\n", d->malloc_used);
        dprintf(fd, "Guarded %zu\n", d->malloc_guarded);
        dump_free_chunk_info(fd, d);
@@ -2289,7 +2306,7 @@ malloc_gdump(int fd)
        int i;
        int saved_errno = errno;
 
-       for (i = 0; i < _MALLOC_MUTEXES; i++)
+       for (i = 0; i < mopts.malloc_mutexes; i++)
                malloc_dump(fd, i, mopts.malloc_pool[i]);
 
        errno = saved_errno;
@@ -2305,15 +2322,15 @@ malloc_exit(void)
        if (fd != -1) {
                dprintf(fd, "******** Start dump %s *******\n", __progname);
                dprintf(fd,
-                   "MT=%d I=%d F=%d U=%d J=%d R=%d X=%d C=%d cache=%u G=%zu\n",
-                   mopts.malloc_mt, mopts.internal_funcs,
-                   mopts.malloc_freecheck,
+                   "MT=%d M=%u I=%d F=%d U=%d J=%d R=%d X=%d C=%d cache=%u 
G=%zu\n",
+                   mopts.malloc_mt, mopts.mallloc_mutexes,
+                   mopts.internal_funcs, mopts.malloc_freecheck,
                    mopts.malloc_freeunmap, mopts.malloc_junk,
                    mopts.malloc_realloc, mopts.malloc_xmalloc,
                    mopts.chunk_canaries, mopts.malloc_cache,
                    mopts.malloc_guard);
 
-               for (i = 0; i < _MALLOC_MUTEXES; i++)
+               for (i = 0; i < mopts.malloc_mutexes; i++)
                        malloc_dump(fd, i, mopts.malloc_pool[i]);
                dprintf(fd, "******** End dump %s *******\n", __progname);
                close(fd);
Index: thread/rthread_libc.c
===================================================================
RCS file: /cvs/src/lib/libc/thread/rthread_libc.c,v
retrieving revision 1.2
diff -u -p -r1.2 rthread_libc.c
--- thread/rthread_libc.c       5 Sep 2017 02:40:54 -0000       1.2
+++ thread/rthread_libc.c       19 Dec 2018 06:52:07 -0000
@@ -169,14 +169,69 @@ static struct pthread_mutex malloc_lock[
        MALLOC_LOCK_INITIALIZER(0),
        MALLOC_LOCK_INITIALIZER(1),
        MALLOC_LOCK_INITIALIZER(2),
-       MALLOC_LOCK_INITIALIZER(3)
+       MALLOC_LOCK_INITIALIZER(4),
+       MALLOC_LOCK_INITIALIZER(5),
+       MALLOC_LOCK_INITIALIZER(6),
+       MALLOC_LOCK_INITIALIZER(7),
+       MALLOC_LOCK_INITIALIZER(8),
+       MALLOC_LOCK_INITIALIZER(9),
+       MALLOC_LOCK_INITIALIZER(10),
+       MALLOC_LOCK_INITIALIZER(11),
+       MALLOC_LOCK_INITIALIZER(12),
+       MALLOC_LOCK_INITIALIZER(13),
+       MALLOC_LOCK_INITIALIZER(14),
+       MALLOC_LOCK_INITIALIZER(15),
+       MALLOC_LOCK_INITIALIZER(16),
+       MALLOC_LOCK_INITIALIZER(17),
+       MALLOC_LOCK_INITIALIZER(18),
+       MALLOC_LOCK_INITIALIZER(19),
+       MALLOC_LOCK_INITIALIZER(20),
+       MALLOC_LOCK_INITIALIZER(21),
+       MALLOC_LOCK_INITIALIZER(22),
+       MALLOC_LOCK_INITIALIZER(23),
+       MALLOC_LOCK_INITIALIZER(24),
+       MALLOC_LOCK_INITIALIZER(25),
+       MALLOC_LOCK_INITIALIZER(26),
+       MALLOC_LOCK_INITIALIZER(27),
+       MALLOC_LOCK_INITIALIZER(28),
+       MALLOC_LOCK_INITIALIZER(29),
+       MALLOC_LOCK_INITIALIZER(30),
+       MALLOC_LOCK_INITIALIZER(31)
 };
 
 static pthread_mutex_t malloc_mutex[_MALLOC_MUTEXES] = {
        &malloc_lock[0],
        &malloc_lock[1],
        &malloc_lock[2],
-       &malloc_lock[3]
+       &malloc_lock[3],
+       &malloc_lock[4],
+       &malloc_lock[5],
+       &malloc_lock[6],
+       &malloc_lock[7],
+       &malloc_lock[8],
+       &malloc_lock[9],
+       &malloc_lock[10],
+       &malloc_lock[11],
+       &malloc_lock[12],
+       &malloc_lock[13],
+       &malloc_lock[14],
+       &malloc_lock[15],
+       &malloc_lock[16],
+       &malloc_lock[17],
+       &malloc_lock[18],
+       &malloc_lock[19],
+       &malloc_lock[20],
+       &malloc_lock[21],
+       &malloc_lock[22],
+       &malloc_lock[23],
+       &malloc_lock[24],
+       &malloc_lock[25],
+       &malloc_lock[26],
+       &malloc_lock[27],
+       &malloc_lock[28],
+       &malloc_lock[29],
+       &malloc_lock[30],
+       &malloc_lock[31]
 };
 
 void




Reply via email to