On Wed, Jul 31, 2024 at 5:37 PM Andi Kleen <a...@linux.intel.com> wrote:
>
> On Wed, Jul 31, 2024 at 04:02:22PM +0200, Richard Biener wrote:
> > The following improves release_pages when using the madvise path
> > to sort the freelist to get more page entries contiguous and possibly
> > release them.  This populates the unused prev pointer so the reclaim
> > can then easily unlink from the freelist without re-ordering it.
> > The paths not having madvise do not keep the memory allocated, so
> > I left them untouched.
> >
> > Re-bootstrap and regtest running on x86_64-unknown-linux-gnu.
> >
> > I've CCed people messing with release_pages;  This doesn't really
> > address PR114563 but I thought I post this patch anyway - the
> > actual issue we run into for the PR is the linear search of
> > G.free_pages when that list becomes large but a requested allocation
> > cannot be served from it.
> >
> >       PR middle-end/114563
> >       * ggc-page.cc (page_sort): New qsort comparator.
> >       (release_pages): Sort the free_pages list entries after their
> >       memory block virtual address to improve contiguous memory
> >       chunk release.
>
> I saw this in a profile some time ago and tried it with a slightly
> different patch. Instead of a full sort it uses an array to keep
> multiple free lists. But I couldn't find any speed ups in non checking
> builds later.
>
> My feeling is that an array is probably more efficient.
>
> I guess should compare both on that PR.
>
>
> diff --git a/gcc/ggc-page.cc b/gcc/ggc-page.cc
> index 4245f843a29f..af1627b002c6 100644
> --- a/gcc/ggc-page.cc
> +++ b/gcc/ggc-page.cc
> @@ -234,6 +234,8 @@ static struct
>  }
>  inverse_table[NUM_ORDERS];
>
> +struct free_list;
> +
>  /* A page_entry records the status of an allocation page.  This
>     structure is dynamically sized to fit the bitmap in_use_p.  */
>  struct page_entry
> @@ -251,6 +253,9 @@ struct page_entry
>       of the host system page size.)  */
>    size_t bytes;
>
> +  /* Free list of this page size.  */
> +  struct free_list *free_list;
> +

?  this seems misplaced

>    /* The address at which the memory is allocated.  */
>    char *page;
>
> @@ -368,6 +373,15 @@ struct free_object
>  };
>  #endif
>
> +constexpr int num_free_list = 8;
> +
> +/* A free_list for pages with BYTES size.  */
> +struct free_list
> +{
> +  size_t bytes;
> +  page_entry *free_pages;
> +};
> +
>  /* The rest of the global variables.  */
>  static struct ggc_globals
>  {
> @@ -412,8 +426,8 @@ static struct ggc_globals
>    int dev_zero_fd;
>  #endif
>
> -  /* A cache of free system pages.  */
> -  page_entry *free_pages;
> +  /* A cache of free system pages. Entry 0 is fallback.  */
> +  struct free_list free_lists[num_free_list];

I also thought of this, but ...

>  #ifdef USING_MALLOC_PAGE_GROUPS
>    page_group *page_groups;
> @@ -754,6 +768,26 @@ clear_page_group_in_use (page_group *group, char *page)
>  }
>  #endif
>
> +/* Find a free list for ENTRY_SIZE.  */
> +
> +static inline struct free_list *
> +find_free_list (size_t entry_size)
> +{
> +  int i;
> +  for (i = 1; i < num_free_list; i++)
> +    {
> +      if (G.free_lists[i].bytes == entry_size)
> +       return &G.free_lists[i];
> +      if (G.free_lists[i].bytes == 0)
> +       {
> +         G.free_lists[i].bytes = entry_size;
> +         return &G.free_lists[i];
> +       }
> +    }
> +  /* Fallback.  */
> +  return &G.free_lists[0];
> +}
> +
>  /* Allocate a new page for allocating objects of size 2^ORDER,
>     and return an entry for it.  The entry is not added to the
>     appropriate page_table list.  */
> @@ -770,6 +804,7 @@ alloc_page (unsigned order)
>  #ifdef USING_MALLOC_PAGE_GROUPS
>    page_group *group;
>  #endif
> +  struct free_list *free_list;
>
>    num_objects = OBJECTS_PER_PAGE (order);
>    bitmap_size = BITMAP_SIZE (num_objects + 1);
> @@ -782,8 +817,10 @@ alloc_page (unsigned order)
>    entry = NULL;
>    page = NULL;
>
> +  free_list = find_free_list (entry_size);

... this made me not consider it.  Looking closer I think
'entry_size' only depends on 'order' and thus a direct mapping
would work here.

> +
>    /* Check the list of free pages for one we can use.  */
> -  for (pp = &G.free_pages, p = *pp; p; pp = &p->next, p = *pp)
> +  for (pp = &free_list->free_pages, p = *pp; p; pp = &p->next, p = *pp)
>      if (p->bytes == entry_size)

and this search should then go - if this cannot become O(1) adding any
complexity elsewhere is not worth it.

I also saw we only use "pages" for low-order allocations but malloc for
higher orders but with allocating in QUIRE_SIZE granularity (2MB to
get hugepages) it seems it would be interesting to change that and
not immediately split up the 2MB into page-size chunks.

In the PR you also find a patch attached to add a skip-list to
the single free_list, this improved the testcase but it's not O(1)
picking from it still.

>        break;
>
> @@ -816,7 +853,7 @@ alloc_page (unsigned order)
>        /* We want just one page.  Allocate a bunch of them and put the
>          extras on the freelist.  (Can only do this optimization with
>          mmap for backing store.)  */
> -      struct page_entry *e, *f = G.free_pages;
> +      struct page_entry *e, *f = free_list->free_pages;
>        int i, entries = GGC_QUIRE_SIZE;
>
>        page = alloc_anon (NULL, G.pagesize * GGC_QUIRE_SIZE, false);
> @@ -833,12 +870,13 @@ alloc_page (unsigned order)
>           e = XCNEWVAR (struct page_entry, page_entry_size);
>           e->order = order;
>           e->bytes = G.pagesize;
> +         e->free_list = free_list;
>           e->page = page + (i << G.lg_pagesize);
>           e->next = f;
>           f = e;
>         }
>
> -      G.free_pages = f;
> +      free_list->free_pages = f;
>      }
>    else
>      page = alloc_anon (NULL, entry_size, true);
> @@ -904,12 +942,13 @@ alloc_page (unsigned order)
>               e = XCNEWVAR (struct page_entry, page_entry_size);
>               e->order = order;
>               e->bytes = G.pagesize;
> +             e->free_list = free_list;
>               e->page = a;
>               e->group = group;
>               e->next = f;
>               f = e;
>             }
> -         G.free_pages = f;
> +         free_list->free_pages = f;
>         }
>      }
>  #endif
> @@ -918,6 +957,7 @@ alloc_page (unsigned order)
>      entry = XCNEWVAR (struct page_entry, page_entry_size);
>
>    entry->bytes = entry_size;
> +  entry->free_list = free_list;
>    entry->page = page;
>    entry->context_depth = G.context_depth;
>    entry->order = order;
> @@ -1006,14 +1046,15 @@ free_page (page_entry *entry)
>
>    adjust_depth ();
>
> -  entry->next = G.free_pages;
> -  G.free_pages = entry;
> +  struct free_list *free_list = entry->free_list;
> +  entry->next = free_list->free_pages;
> +  free_list->free_pages = entry;
>  }
>
> -/* Release the free page cache to the system.  */
> +/* Release the free page cache for FREE_LIST to the system.  */
>
>  static void
> -release_pages (void)
> +do_release_pages (struct free_list *free_list)
>  {
>    size_t n1 = 0;
>    size_t n2 = 0;
> @@ -1031,7 +1072,7 @@ release_pages (void)
>       This does not always work because the free_pages list is only
>       approximately sorted. */
>
> -  p = G.free_pages;
> +  p = free_list->free_pages;
>    prev = NULL;
>    while (p)
>      {
> @@ -1060,7 +1101,7 @@ release_pages (void)
>           if (prev)
>             prev->next = p;
>            else
> -            G.free_pages = p;
> +           free_list->free_pages = p;
>            G.bytes_mapped -= mapped_len;
>           n1 += len;
>           continue;
> @@ -1071,7 +1112,7 @@ release_pages (void)
>    /* Now give back the fragmented pages to the OS, but keep the address
>       space to reuse it next time. */
>
> -  for (p = G.free_pages; p; )
> +  for (p = free_list->free_pages; p; )
>      {
>        if (p->discarded)
>          {
> @@ -1108,7 +1149,7 @@ release_pages (void)
>    size_t len;
>
>    /* Gather up adjacent pages so they are unmapped together.  */
> -  p = G.free_pages;
> +  p = free_list->free_pages;
>
>    while (p)
>      {
> @@ -1131,14 +1172,14 @@ release_pages (void)
>        G.bytes_mapped -= len;
>      }
>
> -  G.free_pages = NULL;
> +  free_list->free_pages = NULL;
>  #endif
>  #ifdef USING_MALLOC_PAGE_GROUPS
>    page_entry **pp, *p;
>    page_group **gp, *g;
>
>    /* Remove all pages from free page groups from the list.  */
> -  pp = &G.free_pages;
> +  pp = &free_list->free_pages;
>    while ((p = *pp) != NULL)
>      if (p->group->in_use == 0)
>        {
> @@ -1172,6 +1213,15 @@ release_pages (void)
>      }
>  }
>
> +/* Release the free page cache to the system.  */
> +
> +static void
> +release_pages ()
> +{
> +  for (int i = 0; i < num_free_list; i++)
> +    do_release_pages (&G.free_lists[i]);
> +}
> +
>  /* This table provides a fast way to determine ceil(log_2(size)) for
>     allocation requests.  The minimum allocation size is eight bytes.  */
>  #define NUM_SIZE_LOOKUP 512
> @@ -1774,9 +1824,10 @@ init_ggc (void)
>      /* We have a good page, might as well hold onto it...  */
>      e = XCNEW (struct page_entry);
>      e->bytes = G.pagesize;
> +    e->free_list = find_free_list (G.pagesize);
>      e->page = p;
> -    e->next = G.free_pages;
> -    G.free_pages = e;
> +    e->next = e->free_list->free_pages;
> +    e->free_list->free_pages = e;
>    }
>  #endif
>
> @@ -2650,6 +2701,7 @@ ggc_pch_read (FILE *f, void *addr)
>                                             - sizeof (long)
>                                             + BITMAP_SIZE (num_objs + 1)));
>        entry->bytes = bytes;
> +      entry->free_list = find_free_list (bytes);
>        entry->page = offs;
>        entry->context_depth = 0;
>        offs += bytes;
>
>

Reply via email to