Repository : ssh://darcs.haskell.org//srv/darcs/ghc

On branch  : ghc-7.6

http://hackage.haskell.org/trac/ghc/changeset/33e16f48649b3bed7baac146edc9f80e8d05257b

>---------------------------------------------------------------

commit 33e16f48649b3bed7baac146edc9f80e8d05257b
Author: Simon Marlow <marlo...@gmail.com>
Date:   Tue Aug 21 11:39:06 2012 +0100

    Reduce fragmentation when using +RTS -H (with or without a size)
    
    MERGED from commit a68df77ede928e6c7790dacb5925625792a904d3

>---------------------------------------------------------------

 rts/sm/BlockAlloc.c |   35 +++++++++++++++++++++++++++++++++++
 rts/sm/BlockAlloc.h |    2 ++
 rts/sm/Storage.c    |   10 ++++++++--
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 1c7bd38..5d1b1c0 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -389,6 +389,41 @@ finish:
     return bd;
 }
 
+//
+// Allocate a chunk of blocks that is at most a megablock in size.
+// This API is used by the nursery allocator that wants contiguous
+// memory preferably, but doesn't require it.  When memory is
+// fragmented we might have lots of large chunks that are less than a
+// full megablock, so allowing the nursery allocator to use these
+// reduces fragmentation considerably.  e.g. on a GHC build with +RTS
+// -H, I saw fragmentation go from 17MB down to 3MB on a single compile.
+//
+bdescr *
+allocLargeChunk (void)
+{
+    bdescr *bd;
+    nat ln;
+
+    ln = 5; // start in the 32-63 block bucket
+    while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
+        ln++;
+    }
+    if (ln == MAX_FREE_LIST) {
+        return allocGroup(BLOCKS_PER_MBLOCK);
+    }
+    bd = free_list[ln];
+
+    n_alloc_blocks += bd->blocks;
+    if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
+
+    dbl_link_remove(bd, &free_list[ln]);
+    initGroup(bd);
+
+    IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
+    IF_DEBUG(sanity, checkFreeListSanity());
+    return bd;
+}
+
 bdescr *
 allocGroup_lock(W_ n)
 {
diff --git a/rts/sm/BlockAlloc.h b/rts/sm/BlockAlloc.h
index c40f395..d40cc42 100644
--- a/rts/sm/BlockAlloc.h
+++ b/rts/sm/BlockAlloc.h
@@ -11,6 +11,8 @@
 
 #include "BeginPrivate.h"
 
+bdescr *allocLargeChunk (void);
+
 /* Debugging  -------------------------------------------------------------- */
 
 extern W_ countBlocks       (bdescr *bd);
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index a07a872..45b6b89 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -438,10 +438,16 @@ allocNursery (bdescr *tail, W_ blocks)
     // tiny optimisation (~0.5%), but it's free.
 
     while (blocks > 0) {
-        n = stg_min(blocks, BLOCKS_PER_MBLOCK);
+        if (blocks >= BLOCKS_PER_MBLOCK) {
+            bd = allocLargeChunk(); // see comment with allocLargeChunk()
+            n = bd->blocks;
+        } else {
+            bd = allocGroup(blocks);
+            n = blocks;
+        }
+
         blocks -= n;
 
-        bd = allocGroup(n);
         for (i = 0; i < n; i++) {
             initBdescr(&bd[i], g0, g0);
 



_______________________________________________
Cvs-ghc mailing list
Cvs-ghc@haskell.org
http://www.haskell.org/mailman/listinfo/cvs-ghc

Reply via email to