On Wed, Jul 31, 2024 at 04:02:22PM +0200, Richard Biener wrote:
> The following improves release_pages when using the madvise path
> to sort the freelist to get more page entries contiguous and possibly
> release them.  This populates the unused prev pointer so the reclaim
> can then easily unlink from the freelist without re-ordering it.
> The paths not having madvise do not keep the memory allocated, so
> I left them untouched.
> 
> Re-bootstrap and regtest running on x86_64-unknown-linux-gnu.
> 
> I've CCed people messing with release_pages;  This doesn't really
> address PR114563 but I thought I post this patch anyway - the
> actual issue we run into for the PR is the linear search of
> G.free_pages when that list becomes large but a requested allocation
> cannot be served from it.
> 
>       PR middle-end/114563
>       * ggc-page.cc (page_sort): New qsort comparator.
>       (release_pages): Sort the free_pages list entries after their
>       memory block virtual address to improve contiguous memory
>       chunk release.

I saw this in a profile some time ago and tried it with a slightly
different patch. Instead of a full sort it uses an array to keep
multiple free lists. But I couldn't find any speed ups in non checking
builds later.

My feeling is that an array is probably more efficient.

I guess should compare both on that PR.


diff --git a/gcc/ggc-page.cc b/gcc/ggc-page.cc
index 4245f843a29f..af1627b002c6 100644
--- a/gcc/ggc-page.cc
+++ b/gcc/ggc-page.cc
@@ -234,6 +234,8 @@ static struct
 }
 inverse_table[NUM_ORDERS];
 
+struct free_list;
+
 /* A page_entry records the status of an allocation page.  This
    structure is dynamically sized to fit the bitmap in_use_p.  */
 struct page_entry
@@ -251,6 +253,9 @@ struct page_entry
      of the host system page size.)  */
   size_t bytes;
 
+  /* Free list of this page size.  */
+  struct free_list *free_list;
+
   /* The address at which the memory is allocated.  */
   char *page;
 
@@ -368,6 +373,15 @@ struct free_object
 };
 #endif
 
+constexpr int num_free_list = 8;
+
+/* A free_list for pages with BYTES size.  */
+struct free_list
+{
+  size_t bytes;
+  page_entry *free_pages;
+};
+
 /* The rest of the global variables.  */
 static struct ggc_globals
 {
@@ -412,8 +426,8 @@ static struct ggc_globals
   int dev_zero_fd;
 #endif
 
-  /* A cache of free system pages.  */
-  page_entry *free_pages;
+  /* A cache of free system pages. Entry 0 is fallback.  */
+  struct free_list free_lists[num_free_list];
 
 #ifdef USING_MALLOC_PAGE_GROUPS
   page_group *page_groups;
@@ -754,6 +768,26 @@ clear_page_group_in_use (page_group *group, char *page)
 }
 #endif
 
+/* Find a free list for ENTRY_SIZE.  */
+
+static inline struct free_list *
+find_free_list (size_t entry_size)
+{
+  int i;
+  for (i = 1; i < num_free_list; i++)
+    {
+      if (G.free_lists[i].bytes == entry_size)
+       return &G.free_lists[i];
+      if (G.free_lists[i].bytes == 0)
+       {
+         G.free_lists[i].bytes = entry_size;
+         return &G.free_lists[i];
+       }
+    }
+  /* Fallback.  */
+  return &G.free_lists[0];
+}
+
 /* Allocate a new page for allocating objects of size 2^ORDER,
    and return an entry for it.  The entry is not added to the
    appropriate page_table list.  */
@@ -770,6 +804,7 @@ alloc_page (unsigned order)
 #ifdef USING_MALLOC_PAGE_GROUPS
   page_group *group;
 #endif
+  struct free_list *free_list;
 
   num_objects = OBJECTS_PER_PAGE (order);
   bitmap_size = BITMAP_SIZE (num_objects + 1);
@@ -782,8 +817,10 @@ alloc_page (unsigned order)
   entry = NULL;
   page = NULL;
 
+  free_list = find_free_list (entry_size);
+
   /* Check the list of free pages for one we can use.  */
-  for (pp = &G.free_pages, p = *pp; p; pp = &p->next, p = *pp)
+  for (pp = &free_list->free_pages, p = *pp; p; pp = &p->next, p = *pp)
     if (p->bytes == entry_size)
       break;
 
@@ -816,7 +853,7 @@ alloc_page (unsigned order)
       /* We want just one page.  Allocate a bunch of them and put the
         extras on the freelist.  (Can only do this optimization with
         mmap for backing store.)  */
-      struct page_entry *e, *f = G.free_pages;
+      struct page_entry *e, *f = free_list->free_pages;
       int i, entries = GGC_QUIRE_SIZE;
 
       page = alloc_anon (NULL, G.pagesize * GGC_QUIRE_SIZE, false);
@@ -833,12 +870,13 @@ alloc_page (unsigned order)
          e = XCNEWVAR (struct page_entry, page_entry_size);
          e->order = order;
          e->bytes = G.pagesize;
+         e->free_list = free_list;
          e->page = page + (i << G.lg_pagesize);
          e->next = f;
          f = e;
        }
 
-      G.free_pages = f;
+      free_list->free_pages = f;
     }
   else
     page = alloc_anon (NULL, entry_size, true);
@@ -904,12 +942,13 @@ alloc_page (unsigned order)
              e = XCNEWVAR (struct page_entry, page_entry_size);
              e->order = order;
              e->bytes = G.pagesize;
+             e->free_list = free_list;
              e->page = a;
              e->group = group;
              e->next = f;
              f = e;
            }
-         G.free_pages = f;
+         free_list->free_pages = f;
        }
     }
 #endif
@@ -918,6 +957,7 @@ alloc_page (unsigned order)
     entry = XCNEWVAR (struct page_entry, page_entry_size);
 
   entry->bytes = entry_size;
+  entry->free_list = free_list;
   entry->page = page;
   entry->context_depth = G.context_depth;
   entry->order = order;
@@ -1006,14 +1046,15 @@ free_page (page_entry *entry)
 
   adjust_depth ();
 
-  entry->next = G.free_pages;
-  G.free_pages = entry;
+  struct free_list *free_list = entry->free_list;
+  entry->next = free_list->free_pages;
+  free_list->free_pages = entry;
 }
 
-/* Release the free page cache to the system.  */
+/* Release the free page cache for FREE_LIST to the system.  */
 
 static void
-release_pages (void)
+do_release_pages (struct free_list *free_list)
 {
   size_t n1 = 0;
   size_t n2 = 0;
@@ -1031,7 +1072,7 @@ release_pages (void)
      This does not always work because the free_pages list is only
      approximately sorted. */
 
-  p = G.free_pages;
+  p = free_list->free_pages;
   prev = NULL;
   while (p)
     {
@@ -1060,7 +1101,7 @@ release_pages (void)
          if (prev)
            prev->next = p;
           else
-            G.free_pages = p;
+           free_list->free_pages = p;
           G.bytes_mapped -= mapped_len;
          n1 += len;
          continue;
@@ -1071,7 +1112,7 @@ release_pages (void)
   /* Now give back the fragmented pages to the OS, but keep the address 
      space to reuse it next time. */
 
-  for (p = G.free_pages; p; )
+  for (p = free_list->free_pages; p; )
     {
       if (p->discarded)
         {
@@ -1108,7 +1149,7 @@ release_pages (void)
   size_t len;
 
   /* Gather up adjacent pages so they are unmapped together.  */
-  p = G.free_pages;
+  p = free_list->free_pages;
 
   while (p)
     {
@@ -1131,14 +1172,14 @@ release_pages (void)
       G.bytes_mapped -= len;
     }
 
-  G.free_pages = NULL;
+  free_list->free_pages = NULL;
 #endif
 #ifdef USING_MALLOC_PAGE_GROUPS
   page_entry **pp, *p;
   page_group **gp, *g;
 
   /* Remove all pages from free page groups from the list.  */
-  pp = &G.free_pages;
+  pp = &free_list->free_pages;
   while ((p = *pp) != NULL)
     if (p->group->in_use == 0)
       {
@@ -1172,6 +1213,15 @@ release_pages (void)
     }
 }
 
+/* Release the free page cache to the system.  */
+
+static void
+release_pages ()
+{
+  for (int i = 0; i < num_free_list; i++)
+    do_release_pages (&G.free_lists[i]);
+}
+
 /* This table provides a fast way to determine ceil(log_2(size)) for
    allocation requests.  The minimum allocation size is eight bytes.  */
 #define NUM_SIZE_LOOKUP 512
@@ -1774,9 +1824,10 @@ init_ggc (void)
     /* We have a good page, might as well hold onto it...  */
     e = XCNEW (struct page_entry);
     e->bytes = G.pagesize;
+    e->free_list = find_free_list (G.pagesize);
     e->page = p;
-    e->next = G.free_pages;
-    G.free_pages = e;
+    e->next = e->free_list->free_pages;
+    e->free_list->free_pages = e;
   }
 #endif
 
@@ -2650,6 +2701,7 @@ ggc_pch_read (FILE *f, void *addr)
                                            - sizeof (long)
                                            + BITMAP_SIZE (num_objs + 1)));
       entry->bytes = bytes;
+      entry->free_list = find_free_list (bytes);
       entry->page = offs;
       entry->context_depth = 0;
       offs += bytes;


Reply via email to